Load the required libraries.
library(tidyverse)
library(sf)
library(here)
library(readxl)
library(scales)
library(DT)
library(brms)
library(tidybayes)
library(patchwork)
library(marginaleffects)
library(ggrepel)
library(scico)
library(ggdensity)
library(ggpubr)
library(units)
#library(ggsn)
Functions that we will use throughout the script
#labeller for years
year_labels <- c(1950:1963)
#The Glasgow mass minuture chest X-ray campaign happened between 11th March and 12th April 1957
#Segment for graphs to match ACF period
acf_start <- decimal_date(ymd("1957-03-11"))
acf_end <- decimal_date(ymd("1957-04-12"))
Function for counterfactual plots
plot_counterfactual <- function(model_data, model, population_denominator, outcome, grouping_var=NULL, re_formula,...){
#labeller for years
year_labels <- c(1950:1963)
#The Glasgow mass minuture chest X-ray campaign happened between 11th March and 12th April 1957
#Segment for graphs to match ACF period
acf_start <- decimal_date(ymd("1957-03-11"))
acf_end <- decimal_date(ymd("1957-04-12"))
summary <- {{model_data}} %>%
select(year, year2, y_num, acf_period, {{population_denominator}}, {{outcome}}, {{grouping_var}}) %>%
add_epred_draws({{model}}, re_formula={{re_formula}}) %>%
group_by(year2, acf_period, {{grouping_var}}) %>%
mean_qi() %>%
mutate(.epred_inc = .epred/{{population_denominator}}*100000,
.epred_inc.lower = .epred.lower/{{population_denominator}}*100000,
.epred_inc.upper = .epred.upper/{{population_denominator}}*100000) %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Before Intervention",
acf_period=="c. post-acf" ~ "Post Intervention"))
#create the counterfactual (no intervention), and summarise
counterfact <-
add_epred_draws(object = {{model}},
newdata = {{model_data}} %>%
select(year, year2, y_num, {{population_denominator}}, {{grouping_var}}, {{outcome}}) %>%
mutate(acf_period = "a. pre-acf"), re_formula={{re_formula}}) %>%
group_by(year2, acf_period, {{grouping_var}}) %>%
mean_qi() %>%
mutate(.epred_inc = .epred/{{population_denominator}}*100000,
.epred_inc.lower = .epred.lower/{{population_denominator}}*100000,
.epred_inc.upper = .epred.upper/{{population_denominator}}*100000) %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Before Intervention",
acf_period=="c. post-acf" ~ "Post Intervention"))
#plot the intervention effect
p <- summary %>%
droplevels() %>%
ggplot() +
geom_ribbon(aes(ymin=.epred_inc.lower, ymax=.epred_inc.upper, x=year2, group = acf_period, fill=acf_period), alpha=0.5) +
geom_ribbon(data = counterfact %>% filter(year>=1956),
aes(ymin=.epred_inc.lower, ymax=.epred_inc.upper, x=year2, fill="Counterfactual"), alpha=0.5) +
geom_line(data = counterfact %>% filter(year>=1956),
aes(y=.epred_inc, x=year2, colour="Counterfactual")) +
geom_line(aes(y=.epred_inc, x=year2, group=acf_period, colour=acf_period)) +
geom_point(data = {{model_data}}, aes(y={{outcome}}, x=year2, shape=acf_period), size=2) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
theme_ggdist() +
scale_y_continuous(labels=comma, limits = c(0,NA)) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
scale_fill_manual(values = c("#DE0D92", "grey50", "#4D6CFA") , name="", na.translate = F) +
scale_colour_manual(values = c("#DE0D92", "grey50", "#4D6CFA") , name="", na.translate = F) +
scale_shape_discrete(name="", na.translate = F) +
labs(
x = "Year",
y = "Case notification rate (per 100,000)",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme(legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA),
title = element_text(size=14),
axis.text.x = element_text(size=10, angle = 90, hjust=1, vjust=0.5),
legend.text = element_text(size=12)) +
guides(shape="none")
facet_vars <- vars(...)
if (length(facet_vars) != 0) {
p <- p + facet_wrap(facet_vars)
}
p
}
Function for calculating measures of change over time (RR.peak, RR.level, RR.slope)
summarise_change <- function(model_data, model, population_denominator, grouping_var = NULL, re_formula = NULL) {
#functions for calculating RR.peak
#i.e. relative case notification rate in 1957 vs. counterfactual trend for 1957
grouping_var <- enquo(grouping_var)
if (!is.null({{grouping_var}})) {
#make the prediction matrix, conditional on whether we want random effects included or not.
out <- crossing({{model_data}} %>%
select({{population_denominator}}, y_num, !!grouping_var) %>%
filter(y_num == 8),
acf_period = c("a. pre-acf", "b. acf")
)
} else {
out <- crossing({{model_data}} %>%
select({{population_denominator}}, y_num) %>%
filter(y_num == 8),
acf_period = c("a. pre-acf", "b. acf")
)
}
peak_draws <- add_epred_draws(newdata = out,
object = {{model}},
re_formula = {{re_formula}}) %>%
mutate(epred_cnr = .epred/population_without_inst_ship*100000) %>%
group_by(.draw, !!grouping_var) %>%
summarise(estimate = last(epred_cnr)/first(epred_cnr)) %>%
ungroup() %>%
mutate(measure = "RR.peak")
peak_summary <- peak_draws %>%
group_by(!!grouping_var) %>%
mean_qi(estimate) %>%
mutate(measure = "RR.peak")
#functions for calculating RR.step
#i.e. relative case notification rate in 1958 vs. counterfactual trend for 1958
if (!is.null({{grouping_var}})) {
out2 <- crossing({{model_data}} %>%
select({{population_denominator}}, y_num, !!grouping_var) %>%
filter(y_num == 9),
acf_period = c("a. pre-acf", "c. post-acf")
)
} else {
out2 <- crossing({{model_data}} %>%
select({{population_denominator}}, y_num) %>%
filter(y_num == 9),
acf_period = c("a. pre-acf", "c. post-acf")
)
}
level_draws <- add_epred_draws(newdata = out2,
object = {{model}},
re_formula = {{re_formula}}) %>%
arrange(y_num, .draw) %>%
mutate(epred_cnr = .epred/population_without_inst_ship*100000) %>%
group_by(.draw, !!grouping_var) %>%
summarise(estimate = last(epred_cnr)/first(epred_cnr)) %>%
ungroup() %>%
mutate(measure = "RR.level")
level_summary <- level_draws %>%
group_by(!!grouping_var) %>%
mean_qi(estimate) %>%
mutate(measure = "RR.level")
#functions for calculating RR.slope
#i.e. relative change in case notification rate in 1958-1963 vs. counterfactual trend for 1959-1963
if (!is.null({{grouping_var}})) {
out3 <- crossing({{model_data}} %>%
select({{population_denominator}}, y_num, !!grouping_var) %>%
filter(y_num %in% c(9,14)),
acf_period = c("a. pre-acf", "c. post-acf")
)
} else {
out3 <- crossing({{model_data}} %>%
select({{population_denominator}}, y_num) %>%
filter(y_num %in% c(9,14)),
acf_period = c("a. pre-acf", "c. post-acf")
)
}
slope_draws <- add_epred_draws(newdata = out3,
object = {{model}},
re_formula = {{re_formula}}) %>%
arrange(y_num) %>%
ungroup() %>%
mutate(epred_cnr = .epred/population_without_inst_ship*100000) %>%
group_by(.draw, acf_period, !!grouping_var) %>%
summarise(slope = (last(epred_cnr) - first(epred_cnr)) / (last(y_num)-first(y_num))) %>%
ungroup() %>%
group_by(.draw, !!grouping_var) %>%
summarise(estimate = last(slope)/first(slope)) %>%
mutate(measure = "RR.slope")
slope_summary <- slope_draws %>%
group_by(!!grouping_var) %>%
median_qi(estimate) %>%
mutate(measure = "RR.slope")
#gather all the results into a named list
lst(peak_draws=peak_draws, peak_summary=peak_summary,
level_draws=level_draws, level_summary=level_summary,
slope_draws=slope_draws, slope_summary=slope_summary)
}
Function for calculating difference from counterfactual
calculate_counterfactual <- function(model_data, model, population_denominator, grouping_var=NULL, re_formula=NA){
#effect vs. counterfactual
counterfact <-
add_epred_draws(object = {{model}},
newdata = {{model_data}} %>%
select(year, year2, y_num, {{population_denominator}}, {{grouping_var}}) %>%
mutate(acf_period = "a. pre-acf"),
re_formula = {{re_formula}}) %>%
group_by(.draw, year, {{grouping_var}}, acf_period) %>%
mutate(.epred_inc_counterf = .epred/{{population_denominator}}*100000, .epred_counterf=.epred) %>%
filter(year>1957) %>%
ungroup() %>%
select(year, {{population_denominator}}, .draw, .epred_counterf, .epred_inc_counterf, {{grouping_var}})
#Calcuate case notification rate per draw, then summarise.
post_change <-
add_epred_draws(object = {{model}},
newdata = {{model_data}} %>%
select(year, year2, y_num, {{population_denominator}}, {{grouping_var}}, acf_period),
re_formula = {{re_formula}}) %>%
group_by(.draw, year, {{grouping_var}}, acf_period) %>%
mutate(.epred_inc = .epred/{{population_denominator}}*100000) %>%
filter(year>1957) %>%
ungroup() %>%
select(year, {{population_denominator}}, {{grouping_var}}, .draw, .epred, .epred_inc, {{grouping_var}})
#for the overall period
counterfact_overall <-
add_epred_draws(object = {{model}},
newdata = {{model_data}} %>%
select(year, year2, y_num, {{population_denominator}}, {{grouping_var}}) %>%
mutate(acf_period = "a. pre-acf"),
re_formula = {{re_formula}}) %>%
group_by(.draw, {{grouping_var}}) %>%
filter(year>1957) %>%
ungroup() %>%
select({{population_denominator}}, .draw, .epred, {{grouping_var}}) %>%
group_by(.draw, {{grouping_var}}) %>%
summarise(.epred_counterf = sum(.epred))
#Calcuate case notification rate per draw, then summarise.
post_change_overall <-
add_epred_draws(object = {{model}},
newdata = {{model_data}} %>%
select(year, year2, y_num, {{population_denominator}}, {{grouping_var}}, acf_period),
re_formula = {{re_formula}}) %>%
group_by(.draw, {{grouping_var}}) %>%
filter(year>1957) %>%
ungroup() %>%
select({{population_denominator}}, {{grouping_var}}, .draw, .epred) %>%
group_by(.draw, {{grouping_var}}) %>%
summarise(.epred = sum(.epred))
counter_post <-
left_join(counterfact, post_change) %>%
mutate(cases_averted = .epred_counterf-.epred,
pct_change = (.epred - .epred_counterf)/.epred_counterf,
diff_inc100k = .epred_inc - .epred_inc_counterf,
rr_inc100k = .epred_inc/.epred_inc_counterf) %>%
group_by(year, {{grouping_var}}) %>%
mean_qi(cases_averted, pct_change, diff_inc100k, rr_inc100k) %>%
ungroup()
counter_post_overall <-
left_join(counterfact_overall, post_change_overall) %>%
mutate(cases_averted = .epred_counterf-.epred,
pct_change = (.epred - .epred_counterf)/.epred_counterf) %>%
group_by({{grouping_var}}) %>%
mean_qi(cases_averted, pct_change) %>%
ungroup()
lst(counter_post, counter_post_overall)
}
Function for tidying up counterfactuals (mostly for making nice tables)
tidy_counterfactuals <- function(data){
data %>%
mutate(across(c(cases_averted:cases_averted.upper, diff_inc100k:diff_inc100k.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(rr_inc100k:rr_inc100k.upper), number_format(accuracy = 0.01))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
mutate(year = as.character(year),
cases_averted = glue::glue("{cases_averted} ({cases_averted.lower} to {cases_averted.upper})"),
pct_change = glue::glue("{pct_change} ({pct_change.lower} to {pct_change.upper})"),
diff_inc = glue::glue("{diff_inc100k} ({diff_inc100k.lower} to {diff_inc100k.upper})"),
rr_inc = glue::glue("{rr_inc100k} ({rr_inc100k.lower} to {rr_inc100k.upper})"))
}
tidy_counterfactuals_overall <- function(data){
data %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
mutate(year = as.character(year),
cases_averted = glue::glue("{cases_averted} ({cases_averted.lower} to {cases_averted.upper})"),
pct_change = glue::glue("{pct_change} ({pct_change.lower} to {pct_change.upper})"))
}
Import datasets for analysis
Make a map of Glasgow wards
glasgow_wards_1951 <- st_read(here("mapping/glasgow_wards_1951.geojson"))
Reading layer `glasgow_wards_1951' from data source
`/Users/petermacpherson/Dropbox/Projects/Historical TB ACF 2023-11-28/Work/analysis/glasgow-cxr/mapping/glasgow_wards_1951.geojson'
using driver `GeoJSON'
Simple feature collection with 37 features and 3 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -4.393502 ymin: 55.77464 xmax: -4.070411 ymax: 55.92814
Geodetic CRS: WGS 84
#read in Scotland boundary
scotland <- st_read(here("mapping/Scotland_boundary/Scotland boundary.shp"))
Reading layer `Scotland boundary' from data source
`/Users/petermacpherson/Dropbox/Projects/Historical TB ACF 2023-11-28/Work/analysis/glasgow-cxr/mapping/Scotland_boundary/Scotland boundary.shp'
using driver `ESRI Shapefile'
Simple feature collection with 1 feature and 1 field
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 5513 ymin: 530249 xmax: 470332 ymax: 1220302
Projected CRS: OSGB36 / British National Grid
#make a bounding box for Glasgow
bbox <- st_bbox(glasgow_wards_1951) |> st_as_sfc()
#plot scotland with a bounding box around the City of Glasgow
scotland_with_bbox <- ggplot() +
geom_sf(data = scotland, fill="antiquewhite") +
geom_sf(data = bbox, colour = "#C60C30", fill="antiquewhite") +
theme_void() +
theme(panel.border = element_rect(colour = "grey78", fill=NA, linewidth = 0.5),
panel.background = element_rect(fill = "#EAF7FA", size = 0.3))
#plot the wards
#note we tidy up some names to fit on map
glasgow_ward_map <- glasgow_wards_1951 %>%
mutate(ward = case_when(ward=="Shettleston and Tollcross" ~ "Shettleston and\nTollcross",
ward=="Partick (West)" ~ "Partick\n(West)",
ward=="Partick (East)" ~ "Partick\n(East)",
ward=="North Kelvin" ~ "North\nKelvin",
ward=="Kinning Park" ~ "Kinning\nPark",
TRUE ~ ward)) %>%
ggplot() +
geom_sf(aes(fill=division)) +
geom_sf_label(aes(label = ward), size=3, fill=NA, label.size = NA, colour="black") +
#scale_colour_identity() +
scale_fill_brewer(palette = "Set3", name="City of Glasgow Division") +
theme_grey() +
labs(x="",
y="",
fill="Division") +
theme(legend.position = "top",
panel.border = element_rect(colour = "grey78", fill=NA, linewidth = 0.5),
panel.background = element_rect(fill = "antiquewhite", size = 0.3),
panel.grid.major = element_line(color = "grey78")) +
guides(fill=guide_legend(title.position = "top", title.hjust = 0.5, title.theme = element_text(face="bold")))
#add the map of scotland as an inset
glasgow_ward_map + inset_element(scotland_with_bbox, 0.75, 0, 1, 0.4)
ggsave(here("figures/s1.png"), height=10, width = 12)
NA
NA
Calculate areas per geographical unit
sf_use_s2(FALSE) #https://github.com/r-spatial/sf/issues/1762
glasgow_wards_1951 <- glasgow_wards_1951 %>%
mutate(area = st_area(glasgow_wards_1951))
glasgow_wards_1951$area_km <- units::set_units(glasgow_wards_1951$area, km^2)
Make division shape files, and calculate area (stopped working, need to fix!)
# glasgow_divisions_1951 <- glasgow_wards_1951 %>%
# group_by(division) %>%
# summarize(geometry = st_union(geometry)) %>%
# nngeo::st_remove_holes() %>%
# mutate(area = st_area(glasgow_divisions_1951))
#
# glasgow_divisions_1951$area_km <- units::set_units(glasgow_divisions_1951$area, km^2)
Load in the datasets for denonomiators, and check for consistency.
overall_pops <- read_xlsx(path = "2023-11-28_glasgow-acf.xlsx", sheet = "overall_population")
overall_pops %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
#shift year to midpoint
overall_pops <- overall_pops %>%
mutate(year2 = year+0.5)
Note, we have three population estimates:
(Population in shipping is estimated from the 1951 census, so is the same for most years)
First, plot the total population
overall_pops %>%
ggplot() +
geom_area(aes(y=total_population, x=year2), alpha=0.5, colour = "mediumseagreen", fill="mediumseagreen") +
geom_point(aes(y=total_population, x=year2), colour = "mediumseagreen") +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
labs(
title = "Glasgow Corporation: total population",
subtitle = "1950 to 1963",
x = "Year",
y = "Population",
caption = "Mid-year estimates\nMass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist()
NA
NA
Now the population excluding institutionalised and shipping population
overall_pops %>%
ggplot() +
geom_area(aes(y=population_without_inst_ship, x=year2), alpha=0.5, colour = "purple", fill="purple") +
geom_point(aes(y=population_without_inst_ship, x=year2), colour = "purple") +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
labs(
title = "Glasgow Corporation: population excluding institutionalised and shipping",
subtitle = "1950 to 1963",
x = "Year",
y = "Population",
caption = "Mid-year estimates\nMass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist()
NA
NA
There are 5 Divisions containing 37 Wards in the Glasgow Corporation, with consistent boundaries over time.
#look-up table for divisions and wards
ward_lookup <- read_xlsx(path = "2023-11-28_glasgow-acf.xlsx", sheet = "divisions_wards")
ward_pops <- read_xlsx(path = "2023-11-28_glasgow-acf.xlsx", sheet = "ward_population")
ward_pops %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
#shift year to midpoint
ward_pops <- ward_pops %>%
mutate(year2 = year+0.5)
#Get the Division population
division_pops <- ward_pops %>%
group_by(division, year) %>%
summarise(population_without_inst_ship = sum(population_without_inst_ship, na.rm = TRUE),
institutions = sum(institutions, na.rm = TRUE),
shipping = sum(shipping, na.rm = TRUE),
total_population = sum(total_population, na.rm = TRUE))
`summarise()` has grouped output by 'division'. You can override using the `.groups` argument.
division_pops %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
NA
Plot the overall population by Division and Ward
division_pops %>%
mutate(year2 = year+0.5) %>%
ggplot() +
geom_area(aes(y=total_population, x=year2, colour=division, fill=division), alpha=0.8) +
geom_point(aes(y=total_population, x=year2, colour=division)) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
facet_wrap(division~.) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
scale_fill_brewer(palette = "Set3", name = "") +
scale_colour_brewer(palette = "Set3", name = "") +
labs(
title = "Glasgow Corporation: total population by Division",
subtitle = "1950 to 1963",
x = "Year",
y = "Population",
caption = "Mid-year estimates\nMass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist() +
theme(legend.position = "bottom")
NA
NA
ward_pops %>%
ggplot() +
geom_area(aes(y=total_population, x=year2, colour=division, fill=division), alpha=0.8) +
geom_point(aes(y=total_population, x=year2, colour=division)) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
facet_wrap(ward~., ncol=6) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
scale_fill_brewer(palette = "Set3", name="Division") +
scale_colour_brewer(palette = "Set3", name = "Division") +
labs(
title = "Glasgow City: total population by Ward",
subtitle = "1950 to 1963",
x = "Year",
y = "Population",
caption = "Mid-year estimates\nMass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist() +
theme(legend.position = "bottom")
ggsave(here("figures/s2.png"), height=10, width=12)
Approximately, how many person-years of follow-up do we have?
overall_pops %>%
ungroup() %>%
summarise(across(year, length, .names = "years"),
across(c(population_without_inst_ship, total_population), sum)) %>%
mutate(across(where(is.double), comma)) %>%
datatable()
NA
NA
Change in population by ward
ward_pops %>%
group_by(ward) %>%
summarise(pct_change_pop = (last(population_without_inst_ship) - first(population_without_inst_ship))/first(population_without_inst_ship)) %>%
mutate(pct_change_pop = percent(pct_change_pop)) %>%
arrange(pct_change_pop) %>%
datatable()
NA
NA
NA
Output population density by ward and divison for regression modelling
Wards first
(stopped working, need to fix)
# ward_covariates <- glasgow_wards_1951 %>%
# left_join(ward_pops) %>%
# mutate(people_per_km_sq = as.double(population_without_inst_ship/area_km))
#
# #plot it out
#
# ward_covariates %>%
# ggplot() +
# geom_sf(aes(fill=people_per_km_sq)) +
# facet_wrap(year~., ncol=7) +
# scale_fill_viridis_c(option="A") +
# theme(legend.position = "bottom",
# axis.text.x = element_text(angle = 45, hjust=1))
#
# ggsave(here("figures/ward_pop_density.png"), width=10)
#
# write_rds(ward_covariates, here("populations/ward_covariates.rds"))
Now divisions first
(stopped working, need to fix)
# division_covariates <- glasgow_divisions_1951 %>%
# left_join(division_pops) %>%
# mutate(people_per_km_sq = as.double(total_population/area_km))
#
# #plot it out
#
# division_covariates %>%
# ggplot() +
# geom_sf(aes(fill=people_per_km_sq)) +
# geom_sf_label(aes(label = division), size=3, fill=NA, label.size = NA, colour="black", family = "Segoe UI") +
# facet_wrap(year~., ncol=7) +
# scale_fill_viridis_c(option="G") +
# theme(legend.position = "bottom",
# axis.text.x = element_text(angle = 45, hjust=1))
#
# ggsave(here("figures/division_pop_density.png"), width=10)
#
# write_rds(division_covariates, here("populations/division_covariates.rds"))
age_sex <- read_xlsx(path = "2023-11-28_glasgow-acf.xlsx", sheet = "age_sex_population") %>%
pivot_longer(cols = c(male, female),
names_to = "sex")
#collapse down to smaller age groups to be manageable
age_sex <- age_sex %>%
ungroup() %>%
mutate(age = case_when(age == "0 to 4" ~ "00 to 04",
age == "5 to 9" ~ "05 to 14",
age == "10 to 14" ~ "05 to 14",
age == "15 to 19" ~ "15 to 24",
age == "20 to 24" ~ "15 to 24",
age == "25 to 29" ~ "25 to 34",
age == "30 to 34" ~ "25 to 34",
age == "35 to 39" ~ "35 to 44",
age == "40 to 44" ~ "35 to 44",
age == "45 to 49" ~ "45 to 59",
age == "50 to 54" ~ "45 to 59",
age == "55 to 59" ~ "45 to 59",
TRUE ~ "60 & up")) %>%
group_by(year, age, sex) %>%
mutate(value = sum(value)) %>%
ungroup()
m_age_sex <- lm(value ~ splines::ns(year, knots = 3)*age*sex, data = age_sex)
summary(m_age_sex)
Warning: essentially perfect fit: summary may be unreliable
Call:
lm(formula = value ~ splines::ns(year, knots = 3) * age * sex,
data = age_sex)
Residuals:
Min 1Q Median 3Q Max
-2.107e-10 -7.560e-13 0.000e+00 0.000e+00 2.107e-10
Coefficients: (14 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.222e+04 3.820e-10 1.367e+14 <2e-16 ***
splines::ns(year, knots = 3)1 -8.043e+03 7.621e-10 -1.055e+13 <2e-16 ***
splines::ns(year, knots = 3)2 NA NA NA NA
age05 to 14 3.669e+04 4.679e-10 7.843e+13 <2e-16 ***
age15 to 24 -3.893e+03 4.679e-10 -8.320e+12 <2e-16 ***
age25 to 34 -3.996e+04 4.679e-10 -8.540e+13 <2e-16 ***
age35 to 44 -4.230e+04 4.679e-10 -9.040e+13 <2e-16 ***
age45 to 59 5.459e+04 4.411e-10 1.238e+14 <2e-16 ***
age60 & up 7.533e+04 4.126e-10 1.826e+14 <2e-16 ***
sexmale 3.374e+03 5.402e-10 6.244e+12 <2e-16 ***
splines::ns(year, knots = 3)1:age05 to 14 -1.863e+03 9.334e-10 -1.996e+12 <2e-16 ***
splines::ns(year, knots = 3)2:age05 to 14 NA NA NA NA
splines::ns(year, knots = 3)1:age15 to 24 7.533e+04 9.334e-10 8.070e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age15 to 24 NA NA NA NA
splines::ns(year, knots = 3)1:age25 to 34 1.325e+05 9.334e-10 1.420e+14 <2e-16 ***
splines::ns(year, knots = 3)2:age25 to 34 NA NA NA NA
splines::ns(year, knots = 3)1:age35 to 44 1.380e+05 9.334e-10 1.479e+14 <2e-16 ***
splines::ns(year, knots = 3)2:age35 to 44 NA NA NA NA
splines::ns(year, knots = 3)1:age45 to 59 3.474e+03 8.800e-10 3.948e+12 <2e-16 ***
splines::ns(year, knots = 3)2:age45 to 59 NA NA NA NA
splines::ns(year, knots = 3)1:age60 & up -8.453e+04 8.232e-10 -1.027e+14 <2e-16 ***
splines::ns(year, knots = 3)2:age60 & up NA NA NA NA
splines::ns(year, knots = 3)1:sexmale -1.994e+03 1.078e-09 -1.850e+12 <2e-16 ***
splines::ns(year, knots = 3)2:sexmale NA NA NA NA
age05 to 14:sexmale 1.053e+04 6.617e-10 1.592e+13 <2e-16 ***
age15 to 24:sexmale 2.352e+04 6.617e-10 3.555e+13 <2e-16 ***
age25 to 34:sexmale 1.355e+04 6.617e-10 2.047e+13 <2e-16 ***
age35 to 44:sexmale -1.727e+03 6.617e-10 -2.611e+12 <2e-16 ***
age45 to 59:sexmale 2.774e+03 6.238e-10 4.446e+12 <2e-16 ***
age60 & up:sexmale -7.761e+04 5.835e-10 -1.330e+14 <2e-16 ***
splines::ns(year, knots = 3)1:age05 to 14:sexmale -2.049e+04 1.320e-09 -1.552e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age05 to 14:sexmale NA NA NA NA
splines::ns(year, knots = 3)1:age15 to 24:sexmale -6.780e+04 1.320e-09 -5.136e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age15 to 24:sexmale NA NA NA NA
splines::ns(year, knots = 3)1:age25 to 34:sexmale -3.804e+04 1.320e-09 -2.882e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age25 to 34:sexmale NA NA NA NA
splines::ns(year, knots = 3)1:age35 to 44:sexmale -1.171e+04 1.320e-09 -8.874e+12 <2e-16 ***
splines::ns(year, knots = 3)2:age35 to 44:sexmale NA NA NA NA
splines::ns(year, knots = 3)1:age45 to 59:sexmale -3.473e+04 1.244e-09 -2.791e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age45 to 59:sexmale NA NA NA NA
splines::ns(year, knots = 3)1:age60 & up:sexmale 1.056e+05 1.164e-09 9.071e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age60 & up:sexmale NA NA NA NA
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 5.755e-11 on 44 degrees of freedom
Multiple R-squared: 1, Adjusted R-squared: 1
F-statistic: 1.714e+29 on 27 and 44 DF, p-value: < 2.2e-16
age_levels <- age_sex %>% select(age) %>% distinct() %>% pull()
age_sex_nd <-
crossing(
age=age_levels,
sex=c("male", "female"),
year = 1950:1963
)
pred_pops <- age_sex_nd %>% modelr::add_predictions(m_age_sex)
Warning: prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
pred_pops %>%
ggplot(aes(x=year, y=pred, colour=age)) +
geom_line() +
geom_point() +
facet_grid(sex~.) +
scale_y_continuous(labels = comma, limits = c(0, 125000))
#How well do they match up with our overall populations?
pred_pops %>%
group_by(year) %>%
summarise(sum_pred_pop = sum(pred)) %>%
right_join(overall_pops) %>%
select(year, sum_pred_pop, population_without_inst_ship, total_population) %>%
pivot_longer(cols = c(sum_pred_pop, population_without_inst_ship, total_population)) %>%
ggplot(aes(x=year, y=value, colour=name)) +
geom_point() +
scale_y_continuous(labels = comma, limits = c(800000, 1250000))
Joining with `by = join_by(year)`
pred_pops %>%
group_by(year, sex) %>%
summarise(sum = sum(pred)) %>%
group_by(year) %>%
mutate(sex_ratio = first(sum)/last(sum))
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
What percentage of adults (15+ participated in the intervention in 1957)?
pred_pops %>%
ungroup() %>%
filter(year==1957) %>%
filter(age != "00 to 04",
age != "05 to 14") %>%
summarise(total_pop = sum(pred)) %>%
mutate(cxr_screened = 622349) %>%
mutate(pct_pop_cxr_screened = percent(cxr_screened/total_pop))
pred_pops %>%
ungroup() %>%
filter(year==1957) %>%
filter(age != "00 to 04",
age != "05 to 14") %>%
summarise(total_pop = sum(pred), .by=sex) %>%
mutate(cxr_screened = c(340474, 281875)) %>%
mutate(pct_pop_cxr_screened = percent(cxr_screened/total_pop))
NA
NA
Population pyramids
label_abs <- function(x) {
comma(abs(x))
}
pred_pops %>%
ungroup() %>%
group_by(year) %>%
mutate(year_pop = sum(pred),
age_sex_pct = percent(pred/year_pop, accuracy=0.1)) %>%
mutate(sex = case_when(sex=="male" ~ "Male",
sex=="female" ~ "Female")) %>%
ggplot(
aes(x = age, fill = sex,
y = ifelse(test = sex == "Female",yes = -pred, no = pred))) +
geom_bar(stat = "identity") +
geom_text(aes(label = age_sex_pct),
position= position_stack(vjust=0.5), colour="white", size=2.5) +
facet_wrap(year~., ncol=7) +
coord_flip() +
scale_y_continuous(labels = label_abs) +
scale_fill_manual(values = c("mediumseagreen", "purple"), name="") +
theme_ggdist() +
theme(axis.text.x = element_text(angle=90, hjust = 1, vjust=0.5),
legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA)) +
labs(x="", y="")
ggsave(here("figures/s3.png"), width=10)
Saving 10 x 4.51 in image
Not perfect, but resonably good. But ahhhhh… the age groups don’t align with the case notification age groups! Come back to think about this later.
Import the tuberculosis cases dataset
Overall, by year.
cases_by_year <- read_xlsx("2023-11-28_glasgow-acf.xlsx", sheet = "by_year")
cases_by_year%>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
#shift year to midpoint
cases_by_year <- cases_by_year %>%
mutate(year2 = year+0.5)
Plot the overall number of case notified per year, by pulmonary and extra pulmonary classification.
cases_by_year %>%
select(-total_notifications, -year) %>%
pivot_longer(cols = c(pulmonary_notifications, `non-pulmonary_notifications`)) %>%
mutate(name = case_when(name == "pulmonary_notifications" ~ "Pulmonary TB",
name == "non-pulmonary_notifications" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=value, x=year2, group = name, fill=name), alpha=0.5) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis notifications",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Number of cases",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist() +
theme(legend.position = "bottom")
NA
NA
Read in the datasets and merge together.
#list all the sheets
all_sheets <- excel_sheets("2023-11-28_glasgow-acf.xlsx")
#get the ward sheets
ward_sheets <- enframe(all_sheets) %>%
filter(grepl("by_ward", value)) %>%
pull(value)
cases_by_ward_sex_year <- map_df(ward_sheets, ~read_xlsx(path = "2023-11-28_glasgow-acf.xlsx",
sheet = .))
cases_by_ward_sex_year %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
NA
Aggregate together to get cases by division
cases_by_division <- cases_by_ward_sex_year %>%
left_join(ward_lookup) %>%
group_by(division, year, tb_type) %>%
summarise(cases = sum(cases, na.rm = TRUE))
Joining with `by = join_by(ward)``summarise()` has grouped output by 'division', 'year'. You can override using the `.groups` argument.
#shift year to midpoint
cases_by_division <- cases_by_division %>%
mutate(year2 = year+0.5) %>%
ungroup()
cases_by_division %>%
select(-year2) %>%
select(year, everything()) %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
cases_by_division %>%
mutate(tb_type = case_when(tb_type == "Pulmonary" ~ "Pulmonary TB",
tb_type == "Non-Pulmonary" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=cases, x=year2, group = tb_type, fill=tb_type), alpha=0.5) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
facet_wrap(division~., scales = "free_y") +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis notifications by Division",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Number of cases",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)\nNote: extra-pulmonary TB cases by Division/Ward not reported in 1962-1963"
) +
theme_ggdist() +
theme(legend.position = "bottom")
cases_by_ward <- cases_by_ward_sex_year %>%
group_by(ward, year, tb_type) %>%
summarise(cases = sum(cases, na.rm = TRUE)) %>%
ungroup()
`summarise()` has grouped output by 'ward', 'year'. You can override using the `.groups` argument.
cases_by_ward %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
select(year, everything()) %>%
datatable()
#shift year to midpoint
cases_by_ward <- cases_by_ward %>%
mutate(year2 = year+0.5)
cases_by_ward %>%
mutate(tb_type = case_when(tb_type == "Pulmonary" ~ "Pulmonary TB",
tb_type == "Non-Pulmonary" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=cases, x=year2, group = tb_type, fill=tb_type), alpha=0.8) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
facet_wrap(ward~., scales = "free_y") +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis notifications by Ward",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Number of cases",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)\nNote: extra-pulmonary TB cases by Division/Ward not reported in 1962-1963"
) +
theme(legend.position = "bottom")
NA
NA
As we don’t have denominators, we will just model the change in counts.
#list all the sheets
all_sheets <- excel_sheets("2023-11-28_glasgow-acf.xlsx")
#get the ward sheets
age_sex_sheets <- enframe(all_sheets) %>%
filter(grepl("by_age_sex", value)) %>%
pull(value)
cases_by_age_sex <- map_df(age_sex_sheets, ~read_xlsx(path = "2023-11-28_glasgow-acf.xlsx",
sheet = .))
cases_by_age_sex %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
NA
NA
Now calculate case notification rates per 100,000 population
Merge the notification and population denominator datasets together.
Here we need to include the whole population (with shipping and institutions) as they are included in the notifications.
overall_inc <- overall_pops %>%
left_join(cases_by_year)
Joining with `by = join_by(year, year2)`
overall_inc <- overall_inc %>%
mutate(inc_pulm_100k = pulmonary_notifications/total_population*100000,
inc_ep_100k = `non-pulmonary_notifications`/total_population*100000,
inc_100k = total_notifications/total_population*100000)
overall_inc %>%
select(year, inc_100k, inc_pulm_100k, inc_ep_100k) %>%
mutate_at(.vars = vars(inc_100k, inc_pulm_100k, inc_ep_100k),
.funs = funs(round)) %>%
datatable()
Warning: `funs()` was deprecated in dplyr 0.8.0.
Please use a list of either functions or lambdas:
# Simple named list:
list(mean = mean, median = median)
# Auto named with `tibble::lst()`:
tibble::lst(mean, median)
# Using lambdas
list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
overall_inc %>%
select(year2, inc_pulm_100k, inc_ep_100k) %>%
pivot_longer(cols = c(inc_pulm_100k, `inc_ep_100k`)) %>%
mutate(name = case_when(name == "inc_pulm_100k" ~ "Pulmonary TB",
name == "inc_ep_100k" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=value, x=year2, group = name, fill=name), alpha=0.5) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis case notification rate",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Case notification rate (per 100,000)",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist() +
theme(legend.position = "bottom")
NA
NA
NA
division_inc <- division_pops %>%
left_join(cases_by_division)
Joining with `by = join_by(division, year)`
division_inc <- division_inc %>%
mutate(inc_100k = cases/total_population*100000)
division_inc %>%
select(year, division, tb_type, inc_100k) %>%
mutate_at(.vars = vars(inc_100k),
.funs = funs(round)) %>%
datatable()
Warning: `funs()` was deprecated in dplyr 0.8.0.
Please use a list of either functions or lambdas:
# Simple named list:
list(mean = mean, median = median)
# Auto named with `tibble::lst()`:
tibble::lst(mean, median)
# Using lambdas
list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
division_inc %>%
mutate(tb_type = case_when(tb_type == "Pulmonary" ~ "Pulmonary TB",
tb_type == "Non-Pulmonary" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=inc_100k, x=year2, group = tb_type, fill=tb_type), alpha=0.5) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
facet_wrap(division~.) +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis case notification rate, by Division",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Case notification rate (per 100,000)",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)\nNote: extra-pulmonary TB cases by Division/Ward not reported in 1962-1963"
) +
theme_ggdist() +
theme(legend.position = "bottom")
NA
NA
NA
Here we will filter out the institutions and harbour from the denominators, as we don’t have reliable population denominators for them.
ward_inc <- ward_pops %>%
left_join(cases_by_ward)
Joining with `by = join_by(ward, year, year2)`
ward_inc <- ward_inc %>%
mutate(inc_100k = cases/population_without_inst_ship*100000)
ward_inc %>%
select(year, ward, tb_type, inc_100k) %>%
mutate_at(.vars = vars(inc_100k),
.funs = funs(round)) %>%
datatable()
Warning: `funs()` was deprecated in dplyr 0.8.0.
Please use a list of either functions or lambdas:
# Simple named list:
list(mean = mean, median = median)
# Auto named with `tibble::lst()`:
tibble::lst(mean, median)
# Using lambdas
list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
ward_inc %>%
mutate(tb_type = case_when(tb_type == "Pulmonary" ~ "Pulmonary TB",
tb_type == "Non-Pulmonary" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=inc_100k, x=year2, group = tb_type, fill=tb_type), alpha=0.5) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
facet_wrap(ward~.) +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis case notification rate, by Ward",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Incidence (per 100,000)",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)\nNote: extra-pulmonary TB cases by Division/Ward not reported in 1962-1963"
) +
theme(legend.position = "bottom")
NA
NA
NA
NA
On a map
st_as_sf(left_join(ward_inc, glasgow_wards_1951)) %>%
filter(tb_type=="Pulmonary") %>%
ggplot() +
geom_sf(aes(fill=inc_100k)) +
facet_wrap(year~., ncol = 7) +
scale_fill_viridis_c(name="Case notification rate (per 100,000)",
option = "A") +
theme_ggdist() +
theme(legend.position = "top",
legend.key.width = unit(2, "cm"),
panel.border = element_rect(colour = "grey78", fill=NA)) +
guides(fill=guide_colorbar(title.position = "top"))
Joining with `by = join_by(division, ward, ward_number)`
Import the TB mortality data.
First, overall deaths. Note that in the original reports, we have a pulmonary TB death rate per million for all years, and numbers of pulmonary TB deaths for each year apart from 1950.
#get the overall mortality sheets
deaths_sheets <- enframe(all_sheets) %>%
filter(grepl("deaths", value)) %>%
pull(value)
overall_deaths <- map_df(deaths_sheets, ~read_xlsx(path = "2023-11-28_glasgow-acf.xlsx",
sheet = .))
overall_deaths %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
NA
NA
NA
Plot the raw numbers of pulmonary deaths
overall_deaths %>%
ggplot(aes(x=year, y=pulmonary_deaths)) +
geom_line(colour = "#DE0D92") +
geom_point(colour = "#DE0D92") +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
labs(y="Pulmonary TB deaths per year",
x = "Year",
title = "Numbers of pulmonary TB deaths",
subtitle = "Glasgow, 1950-1963",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)\nNote: no data for 1950") +
theme_ggdist() +
theme(panel.border = element_rect(colour = "grey78", fill=NA))
NA
NA
Now the incidence of pulmonary TB death
overall_deaths %>%
ggplot(aes(x=year, y=pulmonary_death_rate_per_100k)) +
geom_line(colour = "#4D6CFA") +
geom_point(colour = "#4D6CFA") +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
labs(y="Annual incidence of death (per 100,000)",
x = "Year",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)") +
theme_ggdist() +
theme(panel.border = element_rect(colour = "grey78", fill=NA))
ggsave(here("figures/s7.png"), width=10)
Saving 10 x 4.51 in image
Make Table 1 here, and save for publication.
overall_pops %>%
select(year, total_population) %>%
left_join(overall_inc %>%
select(year,
pulmonary_notifications, inc_pulm_100k,
`non-pulmonary_notifications`, inc_ep_100k,
total_notifications, inc_100k)) %>%
left_join(overall_deaths %>%
select(year,
pulmonary_deaths, pulmonary_death_rate_per_100k)) %>%
mutate(across(where(is.numeric) & !(year), ~round(., digits=1))) %>%
mutate(across(where(is.numeric) & !(year), ~comma(.)))
Joining with `by = join_by(year)`Joining with `by = join_by(year)`
Prepare the datasets for modelling
mdata <- ward_inc %>%
filter(tb_type=="Pulmonary") %>%
mutate(acf_period = case_when(year %in% c(1950:1956) ~ "a. pre-acf",
year %in% c(1957) ~ "b. acf",
year %in% c(1958:1963) ~ "c. post-acf")) %>%
group_by(ward) %>%
mutate(y_num = row_number()) %>%
ungroup()
mdata_extrapulmonary <- ward_inc %>%
filter(tb_type=="Non-Pulmonary") %>%
mutate(acf_period = case_when(year %in% c(1950:1956) ~ "a. pre-acf",
year %in% c(1957) ~ "b. acf",
year %in% c(1958:1963) ~ "c. post-acf")) %>%
group_by(ward) %>%
mutate(y_num = row_number()) %>%
ungroup()
#scaffold for overall predictions
overall_scaffold <- mdata %>%
select(year, year2, y_num, acf_period, population_without_inst_ship, ward, cases) %>%
group_by(year, year2, y_num, acf_period) %>%
summarise(population_without_inst_ship = sum(population_without_inst_ship),
cases = sum(cases)) %>%
ungroup() %>%
mutate(inc_100k = cases/population_without_inst_ship*100000) %>%
left_join(mdata_extrapulmonary %>% group_by(year) %>%
summarise(cases_extrapulmonary = sum(cases))) %>%
mutate(inc_100k_extrapulmonary = cases_extrapulmonary/population_without_inst_ship*100000)
`summarise()` has grouped output by 'year', 'year2', 'y_num'. You can override using the `.groups` argument.Joining with `by = join_by(year)`
This models the case notification rate over time, with a step change for the intervention, and slope change after the intervention.
Work on the priors a bit. We will build up from less complex to more complex.
at the intercept, we expect somewhere around 2500. We will set the standard deviation to both 0.5 and 1 to check what it looks like
c(prior(lognormal(7.600902, 0.5)), #log(2500) = 7.600902
prior(lognormal(7.600902, 1))) %>%
parse_dist() %>%
ggplot(aes(y = prior, dist = .dist, args = .args)) +
stat_halfeye(.width = c(.5, .95)) +
scale_y_discrete(NULL, labels = str_c("lognormal(log(2000), ", c(0.5, 1), ")"),
expand = expansion(add = 0.1)) +
xlab(expression(exp(italic(p)(beta[0])))) +
coord_cartesian(xlim = c(0,15000))
prior(gamma(1, 0.01)) %>%
parse_dist() %>%
ggplot(aes(y=prior, dist = .dist, args = .args)) +
stat_halfeye(.width = c(0.5, 0.95))
#now fit to a model, and plot some prior realisations
m_prior1 <- brm(
cases ~ 0 + Intercept,
family = negbinomial(),
data = overall_scaffold,
sample_prior = "only",
prior = prior(normal(log(2000), 0.5), class = b, coef = Intercept) +
prior(gamma(1, 0.01), class = shape)
)
Compiling Stan program...
Start sampling
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 2.5e-05 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.25 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1:
Chain 1: Elapsed Time: 0.009 seconds (Warm-up)
Chain 1: 0.006 seconds (Sampling)
Chain 1: 0.015 seconds (Total)
Chain 1:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2:
Chain 2: Gradient evaluation took 1e-06 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.01 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2:
Chain 2:
Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2:
Chain 2: Elapsed Time: 0.006 seconds (Warm-up)
Chain 2: 0.007 seconds (Sampling)
Chain 2: 0.013 seconds (Total)
Chain 2:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3:
Chain 3: Gradient evaluation took 0 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3:
Chain 3:
Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3:
Chain 3: Elapsed Time: 0.006 seconds (Warm-up)
Chain 3: 0.006 seconds (Sampling)
Chain 3: 0.012 seconds (Total)
Chain 3:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4:
Chain 4: Gradient evaluation took 0 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4:
Chain 4:
Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4:
Chain 4: Elapsed Time: 0.006 seconds (Warm-up)
Chain 4: 0.006 seconds (Sampling)
Chain 4: 0.012 seconds (Total)
Chain 4:
add_epred_draws(object=m_prior1,
newdata = tibble(intercept=1)) %>%
ggplot(aes(x=intercept, y=.epred)) +
stat_halfeye() +
scale_y_log10(labels = comma)
NA
NA
Now try to add in a term for the effect of y_num. We anticpate that the number of cases will decline by about 1-5% per year. However, as we are pretty uncertain about this, we will just encode a weakly regularising prior to restrict the year size to sensible ranges.
m_prior2 <- brm(
cases ~ 0 + Intercept + y_num,
family = negbinomial(),
data = overall_scaffold,
sample_prior = "only",
prior = prior(normal(log(2000), 0.5), class = b, coef = Intercept) +
prior(gamma(1, 0.01), class = shape) +
prior(normal(0, 0.01), class = b, coef = y_num)
)
Compiling Stan program...
Start sampling
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 2.2e-05 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.22 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1:
Chain 1: Elapsed Time: 0.018 seconds (Warm-up)
Chain 1: 0.007 seconds (Sampling)
Chain 1: 0.025 seconds (Total)
Chain 1:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2:
Chain 2: Gradient evaluation took 1e-06 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.01 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2:
Chain 2:
Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2:
Chain 2: Elapsed Time: 0.017 seconds (Warm-up)
Chain 2: 0.008 seconds (Sampling)
Chain 2: 0.025 seconds (Total)
Chain 2:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3:
Chain 3: Gradient evaluation took 2e-06 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.02 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3:
Chain 3:
Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3:
Chain 3: Elapsed Time: 0.016 seconds (Warm-up)
Chain 3: 0.007 seconds (Sampling)
Chain 3: 0.023 seconds (Total)
Chain 3:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4:
Chain 4: Gradient evaluation took 2e-06 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.02 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4:
Chain 4:
Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4:
Chain 4: Elapsed Time: 0.018 seconds (Warm-up)
Chain 4: 0.007 seconds (Sampling)
Chain 4: 0.025 seconds (Total)
Chain 4:
add_epred_draws(object=m_prior2,
newdata = overall_scaffold) %>%
ggplot(aes(x=year, y=.epred)) +
stat_halfeye() +
scale_y_log10(label=comma)
Now we want to add in a prior for the effect of the acf_intervention. We anticipate the peak to be anywhere between no effect, and a tripling
m_prior3 <- brm(
cases ~ 0 + Intercept + y_num + acf_period,
family = negbinomial(),
data = overall_scaffold,
sample_prior = "only",
prior = prior(normal(log(2000), 0.5), class = b, coef = Intercept) +
prior(gamma(1, 0.01), class = shape) +
prior(normal(0, 0.01), class = b, coef = y_num) +
prior(normal(0, 0.001), class = b)
)
Compiling Stan program...
Start sampling
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 2.2e-05 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.22 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1:
Chain 1: Elapsed Time: 0.061 seconds (Warm-up)
Chain 1: 0.014 seconds (Sampling)
Chain 1: 0.075 seconds (Total)
Chain 1:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2:
Chain 2: Gradient evaluation took 1e-06 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.01 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2:
Chain 2:
Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2:
Chain 2: Elapsed Time: 0.053 seconds (Warm-up)
Chain 2: 0.013 seconds (Sampling)
Chain 2: 0.066 seconds (Total)
Chain 2:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3:
Chain 3: Gradient evaluation took 1e-06 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.01 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3:
Chain 3:
Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3:
Chain 3: Elapsed Time: 0.042 seconds (Warm-up)
Chain 3: 0.014 seconds (Sampling)
Chain 3: 0.056 seconds (Total)
Chain 3:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4:
Chain 4: Gradient evaluation took 1e-06 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.01 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4:
Chain 4:
Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4:
Chain 4: Elapsed Time: 0.057 seconds (Warm-up)
Chain 4: 0.013 seconds (Sampling)
Chain 4: 0.07 seconds (Total)
Chain 4:
add_epred_draws(object=m_prior3,
newdata = overall_scaffold) %>%
ggplot(aes(x=year, y=.epred)) +
stat_halfeye() +
scale_y_log10(labels = comma)
NA
NA
NA
Now we look and see what it looks like with the interactions
m_prior4 <- brm(
cases ~ 0 + Intercept + y_num + acf_period + y_num:acf_period,
family = negbinomial(),
data = overall_scaffold,
sample_prior = "only",
prior = prior(normal(log(2500), 1), class = b, coef = Intercept) +
prior(gamma(1, 0.01), class = shape) +
prior(normal(0, 0.01), class = b)
)
Compiling Stan program...
Start sampling
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 2.2e-05 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.22 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1:
Chain 1: Elapsed Time: 0.026 seconds (Warm-up)
Chain 1: 0.011 seconds (Sampling)
Chain 1: 0.037 seconds (Total)
Chain 1:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2:
Chain 2: Gradient evaluation took 1e-06 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.01 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2:
Chain 2:
Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2:
Chain 2: Elapsed Time: 0.024 seconds (Warm-up)
Chain 2: 0.01 seconds (Sampling)
Chain 2: 0.034 seconds (Total)
Chain 2:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3:
Chain 3: Gradient evaluation took 1e-06 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.01 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3:
Chain 3:
Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3:
Chain 3: Elapsed Time: 0.023 seconds (Warm-up)
Chain 3: 0.01 seconds (Sampling)
Chain 3: 0.033 seconds (Total)
Chain 3:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4:
Chain 4: Gradient evaluation took 1e-06 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.01 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4:
Chain 4:
Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4:
Chain 4: Elapsed Time: 0.021 seconds (Warm-up)
Chain 4: 0.01 seconds (Sampling)
Chain 4: 0.031 seconds (Total)
Chain 4:
add_epred_draws(object=m_prior4,
newdata = overall_scaffold) %>%
ggplot(aes(x=year, y=.epred)) +
stat_halfeye() +
scale_y_log10(label=comma)
NA
NA
NA
Now try adding in the random intercepts
c(prior(lognormal(3.912023, 0.5)), #log(50) = 3.912023
prior(lognormal(3.912023, 1))) %>%
parse_dist() %>%
ggplot(aes(y = prior, dist = .dist, args = .args)) +
stat_halfeye(.width = c(.5, .95)) +
scale_y_discrete(NULL, labels = str_c("lognormal(log(50), ", c(0.5, 1), ")"),
expand = expansion(add = 0.1)) +
xlab(expression(exp(italic(p)(beta[0])))) +
coord_cartesian(xlim = c(0,400))
m_prior5 <- brm(
cases ~ y_num + acf_period + y_num:acf_period + ( 1 | ward),
family = negbinomial(),
data = mdata,
sample_prior = "only",
prior = prior(normal(log(50), 1), class = Intercept) +
prior(gamma(1, 0.01), class = shape) +
prior(normal(0, 0.01), class = b) +
prior(exponential(1), class=sd)
)
Compiling Stan program...
Start sampling
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 6.1e-05 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.61 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1:
Chain 1: Elapsed Time: 0.148 seconds (Warm-up)
Chain 1: 0.04 seconds (Sampling)
Chain 1: 0.188 seconds (Total)
Chain 1:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2:
Chain 2: Gradient evaluation took 3e-06 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.03 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2:
Chain 2:
Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2:
Chain 2: Elapsed Time: 0.143 seconds (Warm-up)
Chain 2: 0.042 seconds (Sampling)
Chain 2: 0.185 seconds (Total)
Chain 2:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3:
Chain 3: Gradient evaluation took 4e-06 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.04 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3:
Chain 3:
Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3:
Chain 3: Elapsed Time: 0.149 seconds (Warm-up)
Chain 3: 0.035 seconds (Sampling)
Chain 3: 0.184 seconds (Total)
Chain 3:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4:
Chain 4: Gradient evaluation took 4e-06 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.04 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4:
Chain 4:
Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4:
Chain 4: Elapsed Time: 0.151 seconds (Warm-up)
Chain 4: 0.028 seconds (Sampling)
Chain 4: 0.179 seconds (Total)
Chain 4:
add_epred_draws(object=m_prior5,
newdata = mdata,
re_formula = NA) %>%
ggplot(aes(x=year, y=.epred)) +
stat_halfeye() +
scale_y_log10(label=comma)
add_epred_draws(object=m_prior5,
newdata = mdata,
re_formula = NA) %>%
ggplot(aes(x=year, y=.epred)) +
stat_halfeye() +
scale_y_log10(label=comma) +
facet_wrap(ward~.)
And add in the random slopes
m_prior6 <- brm(
cases ~ 1 + y_num + acf_period + y_num:acf_period + (1 + y_num*acf_period | ward),
family = negbinomial(),
data = mdata,
sample_prior = "only",
prior = prior(gamma(1, 0.01), class = shape) +
prior(normal(0, 0.1), class = b) +
prior(exponential(1), class=sd) +
prior(lkj(2), class=cor)
)
Compiling Stan program...
Start sampling
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 7.7e-05 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.77 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1:
Chain 1: Elapsed Time: 0.209 seconds (Warm-up)
Chain 1: 0.173 seconds (Sampling)
Chain 1: 0.382 seconds (Total)
Chain 1:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2:
Chain 2: Gradient evaluation took 1e-05 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.1 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2:
Chain 2:
Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2:
Chain 2: Elapsed Time: 0.202 seconds (Warm-up)
Chain 2: 0.178 seconds (Sampling)
Chain 2: 0.38 seconds (Total)
Chain 2:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3:
Chain 3: Gradient evaluation took 1e-05 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.1 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3:
Chain 3:
Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3:
Chain 3: Elapsed Time: 0.203 seconds (Warm-up)
Chain 3: 0.171 seconds (Sampling)
Chain 3: 0.374 seconds (Total)
Chain 3:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4:
Chain 4: Gradient evaluation took 1e-05 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.1 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4:
Chain 4:
Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4:
Chain 4: Elapsed Time: 0.213 seconds (Warm-up)
Chain 4: 0.171 seconds (Sampling)
Chain 4: 0.384 seconds (Total)
Chain 4:
m_prior6 <- brm(
cases ~ 0 + Intercept + y_num + acf_period + y_num:acf_period + ( y_num*acf_period | ward),
family = negbinomial(),
data = mdata,
sample_prior = "only",
prior = prior(normal(log(50), 1), class = b, coef = Intercept) +
prior(gamma(1, 0.01), class = shape) +
prior(normal(0, 0.01), class = b) +
prior(exponential(100), class=sd) +
prior(lkj(2), class=cor)
)
Compiling Stan program...
Start sampling
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 6.4e-05 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.64 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1:
Chain 1: Elapsed Time: 0.545 seconds (Warm-up)
Chain 1: 0.174 seconds (Sampling)
Chain 1: 0.719 seconds (Total)
Chain 1:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2:
Chain 2: Gradient evaluation took 9e-06 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.09 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2:
Chain 2:
Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2:
Chain 2: Elapsed Time: 0.541 seconds (Warm-up)
Chain 2: 0.168 seconds (Sampling)
Chain 2: 0.709 seconds (Total)
Chain 2:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3:
Chain 3: Gradient evaluation took 9e-06 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.09 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3:
Chain 3:
Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3:
Chain 3: Elapsed Time: 0.557 seconds (Warm-up)
Chain 3: 0.169 seconds (Sampling)
Chain 3: 0.726 seconds (Total)
Chain 3:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4:
Chain 4: Gradient evaluation took 1e-05 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.1 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4:
Chain 4:
Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4:
Chain 4: Elapsed Time: 0.571 seconds (Warm-up)
Chain 4: 0.169 seconds (Sampling)
Chain 4: 0.74 seconds (Total)
Chain 4:
add_epred_draws(object=m_prior6,
newdata = mdata,
re_formula = NA) %>%
ggplot(aes(x=year, y=.epred)) +
stat_halfeye() +
scale_y_log10(label=comma)
add_epred_draws(object=m_prior6,
newdata = mdata,
re_formula = ~( 1 + y_num + acf_period | ward)) %>%
ggplot(aes(x=year, y=.epred)) +
stat_halfeye() +
scale_y_log10(label=comma) +
facet_wrap(ward~.)
plot_counterfactual(model_data = overall_scaffold, model=m_prior6, outcome = inc_100k,
population_denominator = population_without_inst_ship, re_formula = NA)
plot_counterfactual(model_data = mdata, model=m_prior6, outcome = inc_100k,
population_denominator = population_without_inst_ship, grouping_var = ward, ward,
re_formula = ~( 1 + y_num + acf_period | ward))
Issue here is the non-centred parameterisation of the intercept prior… Feel like this is a more interpretable way to set priors… but will revert to centred parameterisation for the meantime.
Look at the mean and variance of counts (counts of pulmonary notifications are what we are predicting)
#Mean of counts per year
mean(mdata$cases)
[1] 48.32819
#variance of counts per year
var(mdata$cases)
[1] 915.5749
Quite a bit of over-dispersion here, so negative binomial distribution might be a better choice of distributional family than Poisson.
Fit the model with the data
m_pulmonary <- brm(
cases ~ y_num + acf_period + y_num:acf_period + (y_num*acf_period | ward) + offset(log(population_without_inst_ship)),
data = mdata,
family = negbinomial(),
seed = 1234,
chains = 4, cores = 4,
prior = prior(gamma(0.01, 0.01), class = shape) +
prior(normal(0, 0.1), class = b) +
prior(exponential(1), class=sd) +
prior(lkj(2), class=cor))
Compiling Stan program...
Start sampling
starting worker pid=54171 on localhost:11231 at 11:12:13.446
starting worker pid=54184 on localhost:11231 at 11:12:13.540
starting worker pid=54197 on localhost:11231 at 11:12:13.632
starting worker pid=54210 on localhost:11231 at 11:12:13.723
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 0.000167 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 1.67 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2:
Chain 2: Gradient evaluation took 0.000174 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 1.74 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2:
Chain 2:
Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3:
Chain 3: Gradient evaluation took 0.00016 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 1.6 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3:
Chain 3:
Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4:
Chain 4: Gradient evaluation took 0.000173 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 1.73 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4:
Chain 4:
Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2:
Chain 2: Elapsed Time: 8.819 seconds (Warm-up)
Chain 2: 4.58 seconds (Sampling)
Chain 2: 13.399 seconds (Total)
Chain 2:
Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1:
Chain 1: Elapsed Time: 9.956 seconds (Warm-up)
Chain 1: 4.483 seconds (Sampling)
Chain 1: 14.439 seconds (Total)
Chain 1:
Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3:
Chain 3: Elapsed Time: 10.015 seconds (Warm-up)
Chain 3: 4.216 seconds (Sampling)
Chain 3: 14.231 seconds (Total)
Chain 3:
Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4:
Chain 4: Elapsed Time: 10.638 seconds (Warm-up)
Chain 4: 4.522 seconds (Sampling)
Chain 4: 15.16 seconds (Total)
Chain 4:
Warning: There were 89 divergent transitions after warmup. See
https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
to find out why this is a problem and how to eliminate them.Warning: Examine the pairs() plot to diagnose sampling problems
Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
Running the chains for more iterations may help. See
https://mc-stan.org/misc/warnings.html#tail-ess
#check model diagnostics
summary(m_pulmonary)
Warning: There were 89 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
Family: negbinomial
Links: mu = log; shape = identity
Formula: cases ~ y_num + acf_period + y_num:acf_period + (y_num * acf_period | ward) + offset(log(population_without_inst_ship))
Data: mdata (Number of observations: 518)
Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
total post-warmup draws = 4000
Group-Level Effects:
~ward (Number of levels: 37)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.26 0.04 0.19 0.34 1.00 1200 1623
sd(y_num) 0.02 0.01 0.01 0.03 1.01 675 702
sd(acf_periodb.acf) 0.07 0.05 0.00 0.18 1.01 935 859
sd(acf_periodc.postMacf) 0.13 0.07 0.01 0.28 1.01 475 257
sd(y_num:acf_periodb.acf) 0.01 0.01 0.00 0.02 1.00 1121 1892
sd(y_num:acf_periodc.postMacf) 0.01 0.01 0.00 0.03 1.01 423 227
cor(Intercept,y_num) -0.50 0.20 -0.81 -0.03 1.00 2182 1496
cor(Intercept,acf_periodb.acf) -0.27 0.33 -0.79 0.43 1.00 2352 1616
cor(y_num,acf_periodb.acf) -0.07 0.32 -0.64 0.57 1.00 2118 1756
cor(Intercept,acf_periodc.postMacf) -0.17 0.28 -0.66 0.41 1.00 3212 2728
cor(y_num,acf_periodc.postMacf) 0.14 0.30 -0.46 0.67 1.00 2555 2649
cor(acf_periodb.acf,acf_periodc.postMacf) 0.07 0.33 -0.57 0.68 1.00 1549 2372
cor(Intercept,y_num:acf_periodb.acf) -0.28 0.32 -0.80 0.42 1.00 2249 2764
cor(y_num,y_num:acf_periodb.acf) -0.06 0.31 -0.63 0.56 1.00 3118 3287
cor(acf_periodb.acf,y_num:acf_periodb.acf) -0.08 0.34 -0.69 0.59 1.00 3173 1613
cor(acf_periodc.postMacf,y_num:acf_periodb.acf) 0.08 0.33 -0.58 0.67 1.01 1421 1006
cor(Intercept,y_num:acf_periodc.postMacf) 0.02 0.31 -0.57 0.61 1.00 3214 2272
cor(y_num,y_num:acf_periodc.postMacf) -0.09 0.34 -0.68 0.58 1.00 911 1144
cor(acf_periodb.acf,y_num:acf_periodc.postMacf) 0.08 0.33 -0.57 0.68 1.00 1864 2424
cor(acf_periodc.postMacf,y_num:acf_periodc.postMacf) -0.14 0.37 -0.79 0.57 1.01 827 633
cor(y_num:acf_periodb.acf,y_num:acf_periodc.postMacf) 0.07 0.33 -0.58 0.70 1.00 1006 448
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -6.15 0.05 -6.24 -6.05 1.00 718 1860
y_num -0.02 0.01 -0.03 -0.01 1.00 1817 2102
acf_periodb.acf 0.01 0.10 -0.18 0.20 1.00 2039 734
acf_periodc.postMacf 0.02 0.07 -0.12 0.16 1.00 2040 1159
y_num:acf_periodb.acf 0.08 0.01 0.06 0.11 1.00 2199 2991
y_num:acf_periodc.postMacf -0.05 0.01 -0.07 -0.04 1.00 2374 1217
Family Specific Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
shape 96.60 22.49 62.51 150.08 1.00 1213 979
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
plot(m_pulmonary)
pp_check(m_pulmonary, type='ecdf_overlay')
Using 10 posterior draws for ppc type 'ecdf_overlay' by default.
Summarise the posterior in graphical form
f1b <- plot_counterfactual(model_data = overall_scaffold, model = m_pulmonary,
population_denominator = population_without_inst_ship, outcome = inc_100k, grouping_var=NULL,
re_formula = NA)
f1b
Make this into a figure combined with the map of empirical data
f1a <- st_as_sf(left_join(ward_inc, glasgow_wards_1951)) %>%
filter(tb_type=="Pulmonary") %>%
ggplot() +
geom_sf(aes(fill=inc_100k)) +
facet_wrap(year~., ncol = 7) +
scale_fill_viridis_c(name="Case notification rate (per 100,000)",
option = "A") +
theme_ggdist() +
theme(panel.border = element_rect(colour = "grey78", fill=NA),
legend.position = "top",
#legend.key.width = unit(2, "cm"),
legend.title.align = 0.5) +
guides(fill=guide_colorbar(title.position = "top"))
Joining with `by = join_by(division, ward, ward_number)`
(f1a / f1b) + plot_annotation(tag_levels = "A")
ggsave(here("figures/f1.png"))
Saving 7.29 x 4.51 in image
Summary of change in notifications numerically
overall_change <- summarise_change(model_data=overall_scaffold, model=m_pulmonary,
population_denominator=population_without_inst_ship, grouping_var=NULL, re_formula = NA)
`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.
#want to keep the summary estimates here
tokeep <- c("peak_summary", "level_summary", "slope_summary")
#summary measures in a table
overall_change %>%
keep((names(.) %in% tokeep)) %>%
bind_rows() %>%
mutate(across(c(estimate:.upper), number, accuracy=0.01)) %>%
select(measure, everything()) %>%
datatable()
NA
NA
Numbers of pulmonary TB cases averted compared to counterfactual per year.
overall_pulmonary_counterf <- calculate_counterfactual(model_data = overall_scaffold, model=m_pulmonary, population_denominator = population_without_inst_ship)
Joining with `by = join_by(year, population_without_inst_ship, .draw)`Joining with `by = join_by(.draw)`
overall_pulmonary_counterf$counter_post %>%
mutate(across(c(cases_averted:cases_averted.upper, diff_inc100k:diff_inc100k.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(rr_inc100k:rr_inc100k.upper), number_format(accuracy = 0.01))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
NA
Total pulmonary TB cases averted between 1958 and 1963
overall_pulmonary_counterf$counter_post_overall %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
NA
What are the correlations between peak, level, and slope?
#RR.peak histogram
a <- overall_change$peak_draws %>%
ggplot() +
geom_histogram(aes(x=estimate), fill="darkblue", colour="darkblue", alpha=0.3)+
scale_fill_gradient(high="lightblue1",low="darkblue") +
theme_ggdist() +
theme(legend.position = "none",
panel.border = element_rect(colour = "grey78", fill=NA)) +
labs(x="RR.peak",
y="")
#RR. level histogram
b <- overall_change$level_draws %>%
ggplot() +
geom_histogram(aes(x=estimate), fill="darkblue", colour="darkblue", alpha=0.3)+
scale_fill_gradient(high="lightblue1",low="darkblue") +
theme_ggdist() +
theme(legend.position = "none",
panel.border = element_rect(colour = "grey78", fill=NA)) +
labs(x="RR.level",
y="")
#RR.slope histogram
c <- overall_change$slope_draws %>%
ggplot() +
geom_histogram(aes(x=estimate), fill="darkblue", colour="darkblue", alpha=0.3)+
scale_fill_gradient(high="lightblue1",low="darkblue") +
scale_x_continuous(limits = c(0, 6)) +
theme_ggdist() +
theme(legend.position = "none",
panel.border = element_rect(colour = "grey78", fill=NA)) +
labs(x="RR.slope",
y="")
#Correlation between RR.peak and RR.level
cor_rr_peak_rr_level <- round(cor(pluck(overall_change$peak_draws$estimate), pluck(overall_change$level_draws$estimate)), digits = 2)
#Correlation between RR.peak and RR.slope
cor_rr_peak_rr_slope <- round(cor(pluck(overall_change$peak_draws$estimate), pluck(overall_change$slope_draws$estimate)), digits = 2)
#Correlation between RR.level and RR.slope
cor_rr_level_rr_slope <- round(cor(pluck(overall_change$level_draws$estimate), pluck(overall_change$slope_draws$estimate)), digits = 2)
#plot of correlation between RR.peak and RR.level
d <- bind_cols(RR.peak=pluck(overall_change$peak_draws$estimate),
RR.level =pluck(overall_change$level_draws$estimate)) %>%
ggplot(aes(y=RR.peak, x = RR.level)) +
geom_hex() +
geom_smooth(se=FALSE, colour="firebrick", method = "lm") +
geom_text(aes(y=2.2, x=0.58, label=cor_rr_peak_rr_level), colour="firebrick") +
scale_fill_gradient(high="lightblue1",low="darkblue") +
theme_ggdist() +
theme(legend.position = "none",
panel.border = element_rect(colour = "grey78", fill=NA))
#plot of correlation between RR.peak and RR.slope
e <- bind_cols(RR.peak=pluck(overall_change$peak_draws$estimate),
RR.slope =pluck(overall_change$slope_draws$estimate)) %>%
ggplot(aes(y=RR.peak, x = RR.slope)) +
geom_hex() +
geom_smooth(se=FALSE, colour="firebrick") +
geom_text(aes(y=2.1, x=0.5, label=cor_rr_peak_rr_slope), colour="firebrick") +
scale_x_continuous(limits = c(0, 6)) +
scale_fill_gradient(high="lightblue1",low="darkblue") +
theme_ggdist() +
theme(legend.position = "none",
panel.border = element_rect(colour = "grey78", fill=NA))
#plot of correlation between RR.level and RR.slope
f <- bind_cols(RR.level=pluck(overall_change$level_draws$estimate),
RR.slope =pluck(overall_change$slope_draws$estimate)) %>%
ggplot(aes(y=RR.level, x = RR.slope)) +
geom_hex() +
geom_smooth(se=FALSE, colour="firebrick") +
geom_text(aes(y=0.75, x=0.5, label=cor_rr_level_rr_slope), colour="firebrick") +
scale_x_continuous(limits = c(0, 6)) +
scale_fill_gradient(high="lightblue1",low="darkblue") +
theme_ggdist() +
theme(legend.position = "none",
panel.border = element_rect(colour = "grey78", fill=NA))
(plot_spacer() + plot_spacer() + c) /
(plot_spacer() + b + f) /
(a + d + e)
ggsave(here("figures/pulmonary_cors.pdf"), width=8, height=8)
NA
NA
NA
Plot the counterfactual at ward level
plot_counterfactual(model_data = mdata, model=m_pulmonary, outcome = inc_100k, population_denominator = population_without_inst_ship,
grouping_var = ward, ward, re_formula= ~(1 + y_num*acf_period | ward))
ggsave(here("figures/s4.png"), width=12, height=12)
Summary of change in notifications at ward level
ward_change <- summarise_change(model_data=mdata, model=m_pulmonary,
population_denominator=population_without_inst_ship, grouping_var=ward,
re_formula = ~(1 + y_num*acf_period | ward))
`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.`summarise()` has grouped output by '.draw', 'acf_period'. You can override using the `.groups` argument.`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.
#want to keep the summary estimates here
tokeep <- c("peak_summary", "level_summary", "slope_summary")
#summary measures in a table
ward_change %>%
keep((names(.) %in% tokeep)) %>%
bind_rows() %>%
mutate(across(c(estimate:.upper), number, accuracy=0.01)) %>%
select(measure, everything()) %>%
datatable()
NA
NA
Calculate the counterfactual per ward
ward_pulmonary_counterf <- calculate_counterfactual(model_data = mdata, model=m_pulmonary,
population_denominator = population_without_inst_ship,
grouping_var = ward, re_formula=~(1 + y_num*acf_period | ward))
`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.Joining with `by = join_by(year, population_without_inst_ship, .draw, ward)`Joining with `by = join_by(.draw, ward)`
ward_pulmonary_counterf$counter_post %>%
mutate(across(c(cases_averted:cases_averted.upper, diff_inc100k:diff_inc100k.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(rr_inc100k:rr_inc100k.upper), number_format(accuracy = 0.01))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
NA
Overall counterfactual per ward
ward_pulmonary_counterf$counter_post_overall %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
Now we will model the extra-pulmonary TB notification rate. Struggling a bit with negative binomial model, so revert to Poisson.
m_extrapulmonary <- brm(
cases ~ y_num + acf_period + y_num:acf_period + (y_num*acf_period | ward) + offset(log(population_without_inst_ship)),
data = mdata_extrapulmonary,
family = poisson(),
seed = 1234,
chains = 4, cores = 4,
prior = prior(normal(0, 0.01), class = b) +
prior(exponential(1), class=sd) +
prior(lkj(2), class=cor))
Compiling Stan program...
Start sampling
starting worker pid=54351 on localhost:11231 at 11:14:18.920
starting worker pid=54364 on localhost:11231 at 11:14:19.008
starting worker pid=54377 on localhost:11231 at 11:14:19.098
starting worker pid=54390 on localhost:11231 at 11:14:19.188
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 0.000137 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 1.37 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2:
Chain 2: Gradient evaluation took 0.000153 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 1.53 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2:
Chain 2:
Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3:
Chain 3: Gradient evaluation took 0.000158 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 1.58 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3:
Chain 3:
Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4:
Chain 4: Gradient evaluation took 0.000152 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 1.52 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4:
Chain 4:
Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3:
Chain 3: Elapsed Time: 10.74 seconds (Warm-up)
Chain 3: 7.352 seconds (Sampling)
Chain 3: 18.092 seconds (Total)
Chain 3:
Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2:
Chain 2: Elapsed Time: 17.804 seconds (Warm-up)
Chain 2: 7.331 seconds (Sampling)
Chain 2: 25.135 seconds (Total)
Chain 2:
Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4:
Chain 4: Elapsed Time: 36.561 seconds (Warm-up)
Chain 4: 18.056 seconds (Sampling)
Chain 4: 54.617 seconds (Total)
Chain 4:
Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1:
Chain 1: Elapsed Time: 39.242 seconds (Warm-up)
Chain 1: 22.007 seconds (Sampling)
Chain 1: 61.249 seconds (Total)
Chain 1:
Warning: There were 3 divergent transitions after warmup. See
https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
to find out why this is a problem and how to eliminate them.Warning: Examine the pairs() plot to diagnose sampling problems
Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
Running the chains for more iterations may help. See
https://mc-stan.org/misc/warnings.html#bulk-ess
summary(m_extrapulmonary)
Warning: There were 3 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
Family: poisson
Links: mu = log
Formula: cases ~ y_num + acf_period + y_num:acf_period + (y_num * acf_period | ward) + offset(log(population_without_inst_ship))
Data: mdata_extrapulmonary (Number of observations: 518)
Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
total post-warmup draws = 4000
Group-Level Effects:
~ward (Number of levels: 37)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.45 0.10 0.27 0.68 1.00 558 1196
sd(y_num) 0.07 0.02 0.04 0.11 1.00 1113 1391
sd(acf_periodb.acf) 0.12 0.09 0.00 0.34 1.00 1885 1771
sd(acf_periodc.postMacf) 2.43 0.42 1.70 3.35 1.00 2101 2677
sd(y_num:acf_periodb.acf) 0.02 0.01 0.00 0.05 1.00 1982 1754
sd(y_num:acf_periodc.postMacf) 0.25 0.04 0.17 0.35 1.00 1812 2601
cor(Intercept,y_num) -0.48 0.22 -0.81 0.04 1.00 537 1072
cor(Intercept,acf_periodb.acf) -0.08 0.33 -0.68 0.59 1.00 5222 2858
cor(y_num,acf_periodb.acf) 0.02 0.33 -0.60 0.64 1.00 4841 2729
cor(Intercept,acf_periodc.postMacf) 0.44 0.23 -0.08 0.81 1.00 434 1041
cor(y_num,acf_periodc.postMacf) -0.58 0.17 -0.85 -0.19 1.00 1211 2049
cor(acf_periodb.acf,acf_periodc.postMacf) -0.11 0.33 -0.71 0.54 1.00 781 2037
cor(Intercept,y_num:acf_periodb.acf) -0.09 0.33 -0.68 0.56 1.00 4928 2961
cor(y_num,y_num:acf_periodb.acf) 0.02 0.33 -0.60 0.65 1.00 4791 2871
cor(acf_periodb.acf,y_num:acf_periodb.acf) -0.08 0.34 -0.69 0.58 1.00 3271 3208
cor(acf_periodc.postMacf,y_num:acf_periodb.acf) -0.12 0.33 -0.70 0.53 1.00 3998 3034
cor(Intercept,y_num:acf_periodc.postMacf) -0.44 0.23 -0.81 0.07 1.00 426 1015
cor(y_num,y_num:acf_periodc.postMacf) 0.52 0.18 0.11 0.82 1.00 1100 1698
cor(acf_periodb.acf,y_num:acf_periodc.postMacf) 0.13 0.33 -0.53 0.72 1.00 686 1998
cor(acf_periodc.postMacf,y_num:acf_periodc.postMacf) -0.98 0.01 -1.00 -0.95 1.00 2119 3010
cor(y_num:acf_periodb.acf,y_num:acf_periodc.postMacf) 0.13 0.33 -0.51 0.71 1.00 3593 3169
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -8.22 0.16 -8.56 -7.93 1.00 348 744
y_num -0.03 0.01 -0.05 -0.01 1.00 2097 2703
acf_periodb.acf -0.00 0.01 -0.02 0.02 1.00 7349 2986
acf_periodc.postMacf -0.00 0.01 -0.02 0.02 1.00 6517 3296
y_num:acf_periodb.acf -0.01 0.01 -0.02 0.01 1.00 5127 3181
y_num:acf_periodc.postMacf -0.02 0.01 -0.04 -0.00 1.00 2712 3181
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
plot(m_extrapulmonary)
pp_check(m_extrapulmonary, type='ecdf_overlay')
Using 10 posterior draws for ppc type 'ecdf_overlay' by default.
Summarise in plot
plot_counterfactual(model_data = overall_scaffold, model=m_extrapulmonary,
population_denominator = population_without_inst_ship, outcome=inc_100k_extrapulmonary, re_formula = NA)
ggsave(here("figures/s6.png"), width=10)
Saving 10 x 4.51 in image
Summarise numerically.
overall_change_extrapulmonary <- summarise_change(model_data=overall_scaffold, model=m_extrapulmonary,
population_denominator=population_without_inst_ship, grouping_var=NULL, re_formula = NA)
`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.
#want to keep the summary estimates here
tokeep <- c("peak_summary", "level_summary", "slope_summary")
#summary measures in a table
overall_change_extrapulmonary %>%
keep((names(.) %in% tokeep)) %>%
bind_rows() %>%
mutate(across(c(estimate:.upper), number, accuracy=0.01)) %>%
select(measure, everything()) %>%
datatable()
NA
Numbers of extra-pulmonary TB cases averted overall.
overall_ep_counterf <- calculate_counterfactual(model_data = mdata, model=m_extrapulmonary,
population_denominator = population_without_inst_ship)
Joining with `by = join_by(year, population_without_inst_ship, .draw)`Warning: Detected an unexpected many-to-many relationship between `x` and `y`.Joining with `by = join_by(.draw)`
overall_ep_counterf$counter_post %>%
mutate(across(c(cases_averted:cases_averted.upper, diff_inc100k:diff_inc100k.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(rr_inc100k:rr_inc100k.upper), number_format(accuracy = 0.01))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
Total extrapulmonary TB cases averted between 1958 and 1963
overall_ep_counterf$counter_post_overall %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
NA
Ward-level extra-pulmonary estimates in graphical form.
plot_counterfactual(model_data = mdata_extrapulmonary, model=m_extrapulmonary, outcome = inc_100k,
population_denominator = population_without_inst_ship, grouping_var = ward,re_formula =~(y_num*acf_period | ward),
ward)
ggsave(here("figures/s4.png"), width=10, height=12)
Numerical summary.
ward_change_extrapulmonary <- summarise_change(model_data = mdata_extrapulmonary, model = m_extrapulmonary,
population_denominator = population_without_inst_ship, grouping_var=ward,
re_formula = ~(y_num*acf_period | ward))
`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.`summarise()` has grouped output by '.draw', 'acf_period'. You can override using the `.groups` argument.`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.
#want to keep the summary estimates here
tokeep <- c("peak_summary", "level_summary", "slope_summary")
#summary measures in a table
ward_change_extrapulmonary %>%
keep((names(.) %in% tokeep)) %>%
bind_rows() %>%
mutate(across(c(estimate:.upper), number, accuracy=0.01)) %>%
select(measure, everything()) %>%
datatable()
NA
NA
NA
Fit the model
(Not rewritten the functions for this yet)
mdata_age_sex <- cases_by_age_sex %>%
filter(tb_type=="Pulmonary") %>%
mutate(acf_period = case_when(year %in% c(1950:1956) ~ "a. pre-acf",
year %in% c(1957) ~ "b. acf",
year %in% c(1958:1963) ~ "c. post-acf")) %>%
mutate(year2 = year+0.5) %>%
group_by(age, sex) %>%
mutate(y_num = row_number()) %>%
ungroup()
basic_prior <- c(prior(gamma(1, 0.01), class = shape),
prior(normal(0, 1), class = b))
m_age_sex <- brm(
cases ~ y_num + (acf_period)*(age*sex) + (acf_period:y_num)*(age*sex),
data = mdata_age_sex,
family = negbinomial(),
seed = 1234,
chains = 4, cores = 4,
prior = basic_prior)
Compiling Stan program...
Start sampling
starting worker pid=54529 on localhost:11231 at 11:16:17.674
starting worker pid=54542 on localhost:11231 at 11:16:17.767
starting worker pid=54555 on localhost:11231 at 11:16:17.860
starting worker pid=54568 on localhost:11231 at 11:16:17.952
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 3.1e-05 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.31 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2:
Chain 2: Gradient evaluation took 3.4e-05 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.34 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2:
Chain 2:
Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3:
Chain 3: Gradient evaluation took 3.3e-05 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.33 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3:
Chain 3:
Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4:
Chain 4: Gradient evaluation took 3.6e-05 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.36 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4:
Chain 4:
Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3:
Chain 3: Elapsed Time: 7.56 seconds (Warm-up)
Chain 3: 8.11 seconds (Sampling)
Chain 3: 15.67 seconds (Total)
Chain 3:
Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1:
Chain 1: Elapsed Time: 7.684 seconds (Warm-up)
Chain 1: 8.421 seconds (Sampling)
Chain 1: 16.105 seconds (Total)
Chain 1:
Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2:
Chain 2: Elapsed Time: 7.451 seconds (Warm-up)
Chain 2: 8.595 seconds (Sampling)
Chain 2: 16.046 seconds (Total)
Chain 2:
Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4:
Chain 4: Elapsed Time: 7.648 seconds (Warm-up)
Chain 4: 8.67 seconds (Sampling)
Chain 4: 16.318 seconds (Total)
Chain 4:
summary(m_age_sex)
Family: negbinomial
Links: mu = log; shape = identity
Formula: cases ~ y_num + (acf_period) * (age * sex) + (acf_period:y_num) * (age * sex)
Data: mdata_age_sex (Number of observations: 224)
Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
total post-warmup draws = 4000
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept 4.42 0.12 4.20 4.64 1.00 1343 2147
y_num -0.17 0.03 -0.23 -0.11 1.00 1252 2227
acf_periodb.acf -0.03 1.01 -2.01 1.94 1.00 6912 3214
acf_periodc.postMacf -0.49 0.33 -1.15 0.16 1.00 2345 2569
age06_15 0.63 0.15 0.33 0.92 1.00 1838 2707
age16_25 1.85 0.13 1.59 2.11 1.00 1585 2286
age26_35 1.13 0.14 0.87 1.40 1.00 1596 2498
age36_45 0.28 0.15 -0.02 0.58 1.00 1769 2775
age46_55 -0.62 0.18 -0.97 -0.27 1.00 2008 2581
age56_65 -1.08 0.20 -1.47 -0.69 1.00 2489 2473
age65P -1.64 0.22 -2.07 -1.19 1.00 2930 3037
sexM 0.16 0.15 -0.14 0.46 1.01 1214 2094
age06_15:sexM -0.43 0.21 -0.84 -0.02 1.00 1810 2552
age16_25:sexM -0.58 0.18 -0.94 -0.21 1.00 1517 2332
age26_35:sexM -0.33 0.19 -0.70 0.03 1.00 1508 2571
age36_45:sexM 0.24 0.20 -0.15 0.62 1.01 1634 2305
age46_55:sexM 1.15 0.22 0.73 1.57 1.00 1770 2627
age56_65:sexM 1.13 0.24 0.67 1.61 1.00 2044 2481
age65P:sexM 1.00 0.27 0.46 1.52 1.00 2621 2386
acf_periodb.acf:age06_15 0.02 0.99 -1.90 1.92 1.00 6941 3409
acf_periodc.postMacf:age06_15 -0.59 0.51 -1.59 0.42 1.00 3773 3033
acf_periodb.acf:age16_25 0.04 0.98 -1.86 1.92 1.00 8050 2469
acf_periodc.postMacf:age16_25 0.73 0.42 -0.07 1.55 1.00 2881 2749
acf_periodb.acf:age26_35 0.04 0.96 -1.81 1.89 1.00 6924 3129
acf_periodc.postMacf:age26_35 0.67 0.43 -0.17 1.50 1.00 3245 2773
acf_periodb.acf:age36_45 0.05 1.02 -2.00 2.07 1.00 6510 2705
acf_periodc.postMacf:age36_45 0.75 0.44 -0.11 1.63 1.00 3353 3352
acf_periodb.acf:age46_55 0.06 0.98 -1.87 1.99 1.00 6819 3242
acf_periodc.postMacf:age46_55 0.86 0.47 -0.06 1.76 1.00 3571 3272
acf_periodb.acf:age56_65 0.06 0.98 -1.85 1.94 1.00 7771 3249
acf_periodc.postMacf:age56_65 0.63 0.53 -0.42 1.65 1.00 4063 3040
acf_periodb.acf:age65P 0.04 0.97 -1.90 1.95 1.00 5912 3120
acf_periodc.postMacf:age65P 0.98 0.54 -0.06 2.04 1.00 4663 2946
acf_periodb.acf:sexM -0.00 0.97 -1.84 1.91 1.00 7446 3131
acf_periodc.postMacf:sexM -0.07 0.37 -0.79 0.65 1.00 2992 2737
y_num:acf_periodb.acf -0.10 0.13 -0.35 0.16 1.00 6282 3061
y_num:acf_periodc.postMacf 0.04 0.04 -0.03 0.12 1.00 1706 2258
acf_periodb.acf:age06_15:sexM 0.01 1.00 -1.95 1.98 1.00 7036 2936
acf_periodc.postMacf:age06_15:sexM -0.56 0.63 -1.77 0.69 1.00 5131 3091
acf_periodb.acf:age16_25:sexM 0.01 1.00 -1.95 1.99 1.00 7013 3319
acf_periodc.postMacf:age16_25:sexM 0.66 0.51 -0.36 1.63 1.00 4498 3216
acf_periodb.acf:age26_35:sexM 0.02 0.98 -1.90 1.95 1.00 7308 2890
acf_periodc.postMacf:age26_35:sexM 0.41 0.52 -0.59 1.46 1.00 3950 3227
acf_periodb.acf:age36_45:sexM 0.00 0.97 -1.87 1.96 1.00 6334 2937
acf_periodc.postMacf:age36_45:sexM 0.12 0.54 -0.96 1.19 1.00 4440 3052
acf_periodb.acf:age46_55:sexM -0.00 1.00 -1.96 1.99 1.00 8322 2969
acf_periodc.postMacf:age46_55:sexM 0.66 0.54 -0.39 1.73 1.00 4157 3202
acf_periodb.acf:age56_65:sexM 0.02 0.99 -1.89 1.97 1.00 7427 2653
acf_periodc.postMacf:age56_65:sexM 0.36 0.59 -0.80 1.54 1.00 4028 3010
acf_periodb.acf:age65P:sexM 0.01 1.01 -1.96 2.03 1.00 7489 3096
acf_periodc.postMacf:age65P:sexM 0.28 0.60 -0.89 1.42 1.00 4868 3141
y_num:acf_perioda.preMacf:age06_15 0.02 0.04 -0.05 0.10 1.00 1698 2949
y_num:acf_periodb.acf:age06_15 0.15 0.13 -0.11 0.40 1.00 6254 3314
y_num:acf_periodc.postMacf:age06_15 0.08 0.05 -0.01 0.17 1.00 3265 2929
y_num:acf_perioda.preMacf:age16_25 0.12 0.03 0.06 0.19 1.00 1394 2417
y_num:acf_periodb.acf:age16_25 0.25 0.13 -0.00 0.49 1.00 6965 2970
y_num:acf_periodc.postMacf:age16_25 -0.04 0.04 -0.12 0.04 1.00 2524 2532
y_num:acf_perioda.preMacf:age26_35 0.15 0.03 0.08 0.21 1.00 1481 2509
y_num:acf_periodb.acf:age26_35 0.31 0.13 0.06 0.56 1.00 6429 3105
y_num:acf_periodc.postMacf:age26_35 0.02 0.04 -0.06 0.09 1.00 2849 2826
y_num:acf_perioda.preMacf:age36_45 0.17 0.04 0.10 0.24 1.00 1469 2508
y_num:acf_periodb.acf:age36_45 0.40 0.13 0.14 0.66 1.00 6089 2463
y_num:acf_periodc.postMacf:age36_45 0.06 0.04 -0.02 0.14 1.00 2914 2935
y_num:acf_perioda.preMacf:age46_55 0.19 0.04 0.11 0.28 1.00 1725 2580
y_num:acf_periodb.acf:age46_55 0.44 0.13 0.18 0.70 1.00 6134 2966
y_num:acf_periodc.postMacf:age46_55 0.09 0.04 0.01 0.18 1.00 2990 3023
y_num:acf_perioda.preMacf:age56_65 0.18 0.05 0.08 0.27 1.00 2110 2815
y_num:acf_periodb.acf:age56_65 0.39 0.13 0.13 0.64 1.00 7193 3277
y_num:acf_periodc.postMacf:age56_65 0.11 0.05 0.01 0.20 1.00 3256 2690
y_num:acf_perioda.preMacf:age65P 0.23 0.05 0.13 0.33 1.00 2429 3011
y_num:acf_periodb.acf:age65P 0.43 0.13 0.18 0.68 1.00 5786 3216
y_num:acf_periodc.postMacf:age65P 0.11 0.05 0.01 0.20 1.00 3827 2943
y_num:acf_perioda.preMacf:sexM 0.02 0.04 -0.06 0.10 1.01 1197 2152
y_num:acf_periodb.acf:sexM -0.04 0.13 -0.30 0.22 1.00 6297 3212
y_num:acf_periodc.postMacf:sexM -0.01 0.04 -0.08 0.06 1.00 2052 2719
y_num:acf_perioda.preMacf:age06_15:sexM 0.00 0.05 -0.10 0.10 1.00 1716 2568
y_num:acf_periodb.acf:age06_15:sexM 0.04 0.14 -0.23 0.32 1.00 6886 3066
y_num:acf_periodc.postMacf:age06_15:sexM 0.10 0.06 -0.02 0.21 1.00 3444 3117
y_num:acf_perioda.preMacf:age16_25:sexM -0.00 0.05 -0.10 0.09 1.00 1425 2439
y_num:acf_periodb.acf:age16_25:sexM 0.06 0.14 -0.22 0.34 1.00 6452 3213
y_num:acf_periodc.postMacf:age16_25:sexM -0.01 0.05 -0.11 0.09 1.00 2893 3235
y_num:acf_perioda.preMacf:age26_35:sexM -0.01 0.05 -0.10 0.08 1.00 1444 2540
y_num:acf_periodb.acf:age26_35:sexM 0.05 0.14 -0.23 0.31 1.00 6098 3039
y_num:acf_periodc.postMacf:age26_35:sexM -0.01 0.05 -0.10 0.09 1.00 2708 2917
y_num:acf_perioda.preMacf:age36_45:sexM -0.01 0.05 -0.11 0.08 1.00 1513 2327
y_num:acf_periodb.acf:age36_45:sexM -0.00 0.14 -0.26 0.26 1.00 6187 2910
y_num:acf_periodc.postMacf:age36_45:sexM -0.00 0.05 -0.10 0.10 1.00 2945 3076
y_num:acf_perioda.preMacf:age46_55:sexM -0.01 0.05 -0.11 0.09 1.00 1610 2520
y_num:acf_periodb.acf:age46_55:sexM -0.00 0.14 -0.28 0.27 1.00 7308 2871
y_num:acf_periodc.postMacf:age46_55:sexM -0.07 0.05 -0.17 0.04 1.00 2922 2775
y_num:acf_perioda.preMacf:age56_65:sexM 0.05 0.06 -0.06 0.16 1.00 1811 1970
y_num:acf_periodb.acf:age56_65:sexM 0.07 0.14 -0.20 0.35 1.00 6632 2582
y_num:acf_periodc.postMacf:age56_65:sexM 0.01 0.06 -0.09 0.12 1.00 2976 2962
y_num:acf_perioda.preMacf:age65P:sexM 0.00 0.06 -0.12 0.13 1.00 2189 2105
y_num:acf_periodb.acf:age65P:sexM 0.08 0.14 -0.21 0.36 1.00 6788 3003
y_num:acf_periodc.postMacf:age65P:sexM 0.01 0.06 -0.10 0.12 1.00 3548 2704
Family Specific Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
shape 221.49 75.06 115.49 404.66 1.00 2358 2635
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
plot(m_age_sex)
pp_check(m_age_sex, type='ecdf_overlay')
Using 10 posterior draws for ppc type 'ecdf_overlay' by default.
Summarise posterior
#posterior draws, and summarise
age_sex_summary <- mdata_age_sex %>%
select(year, year2, y_num, acf_period, age, sex) %>%
add_epred_draws(m_age_sex) %>%
group_by(year2, acf_period, age, sex) %>%
mean_qi() %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Before Intervention",
acf_period=="c. post-acf" ~ "Post Intervention"))
#create the counterfactual (no intervention), and summarise
age_sex_counterfact <-
tibble(year = mdata_age_sex$year,
year2 = mdata_age_sex$year2,
y_num = mdata_age_sex$y_num,
age = mdata_age_sex$age,
sex = mdata_age_sex$sex,
acf_period = factor("a. pre-acf")) %>%
add_epred_draws(m_age_sex) %>%
group_by(year2, acf_period, age, sex) %>%
mean_qi() %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Before Intervention",
acf_period=="c. post-acf" ~ "Post Intervention")) %>%
ungroup() %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
mutate(sex = case_when(sex== "M" ~ "Male",
sex== "F" ~ "Female"))
age_sex_summary %>%
ungroup() %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
mutate(sex = case_when(sex== "M" ~ "Male",
sex== "F" ~ "Female")) %>%
ggplot() +
geom_ribbon(aes(ymin=.epred.lower, ymax=.epred.upper, x=year2, group = acf_period, fill=acf_period), alpha=0.5) +
geom_ribbon(data = age_sex_counterfact %>% filter(year>=1956),
aes(ymin=.epred.lower, ymax=.epred.upper, x=year2, fill="Counterfactual"), alpha=0.5) +
geom_line(data = age_sex_counterfact %>% filter(year>=1956),
aes(y=.epred, x=year2, colour="Counterfactual")) +
geom_line(aes(y=.epred, x=year2, group=acf_period, colour=acf_period)) +
geom_point(data = mdata_age_sex %>%
ungroup() %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
mutate(sex = case_when(sex== "M" ~ "Male",
sex== "F" ~ "Female")) , aes(y=cases, x=year2, shape=acf_period), size=2) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
ggh4x::facet_grid2(age~sex, scales = "free_y", independent = "y") +
theme_ggdist() +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
scale_fill_manual(values = c("#DE0D92", "grey50", "#4D6CFA") , name="") +
scale_colour_manual(values = c("#DE0D92", "grey50", "#4D6CFA") , name="") +
scale_shape_discrete(name="") +
labs(
x = "Year",
y = "Case notifications (n)",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme(legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA),
title = element_text(size=14),
axis.text = element_text(size=14),
legend.text = element_text(size=12)) +
guides(shape="none")
ggsave(here("figures/s7.png"), height=10)
Saving 7.29 x 10 in image
(This all needs tidying up and checking - to do!)
nd <- mdata_age_sex %>%
filter(year %in% c(1956:1957)) %>%
select(acf_period, y_num, age, sex)
age_sex_impact_out <-
add_epred_draws(m_age_sex,
newdata=nd) %>%
ungroup() %>%
select(acf_period, .epred, age, sex) %>%
pivot_wider(names_from = acf_period,
values_from = .epred,
values_fn = list) %>%
unnest() %>%
rename(pre_epred = 3,
post_epred = 4) %>%
mutate(acf_diff = post_epred-pre_epred,
acf_rr = post_epred/pre_epred) %>%
group_by(age, sex) %>%
mean_qi(acf_diff, acf_rr)
Warning: `cols` is now required when using `unnest()`.
ℹ Please use `cols = c(`a. pre-acf`, `b. acf`)`.
age_sex_impact_out %>%
mutate_if(is.double, ~ scales::number(x = ., accuracy = 0.01, big.mark = ",")) %>%
datatable()
`mutate_if()` ignored the following grouping variables:
f3a <- age_sex_impact_out %>%
mutate(sex = case_when(sex=="M" ~ "Male",
sex=="F" ~ "Female")) %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
ggplot() +
geom_pointrange(aes(y=acf_rr, ymin=acf_rr.lower, ymax=acf_rr.upper, group=sex,
x=age,
colour = sex),
position = position_dodge(width = 0.25)) +
geom_hline(aes(yintercept=1), linetype=2) +
scale_colour_manual(values = c("purple", "darkorange"), name="") +
labs(x="",
y="Relative notifications (95% UI)\nACF (1957) vs. Before ACF (1956)") +
theme_ggdist() +
theme(legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA))
nd <- mdata_age_sex %>%
filter(year %in% c(1956,1958)) %>%
select(acf_period, y_num, age, sex)
#Do it with calculating incidence, then sumamrising.
age_sex_impact2 <-add_epred_draws(m_age_sex,
newdata=nd) %>%
ungroup() %>%
select(acf_period, .epred, age, sex) %>%
pivot_wider(names_from = acf_period,
values_from = .epred,
values_fn = list) %>%
unnest() %>%
rename(pre_epred = 3,
post_epred = 4) %>%
mutate(acf_diff = post_epred-pre_epred,
acf_rr = post_epred/pre_epred) %>%
group_by(age, sex) %>%
mean_qi(acf_diff, acf_rr)
Warning: `cols` is now required when using `unnest()`.
ℹ Please use `cols = c(`a. pre-acf`, `c. post-acf`)`.
age_sex_impact2 %>%
mutate_if(is.double, ~ scales::number(x = ., accuracy = 0.01, big.mark = ",")) %>%
datatable()
`mutate_if()` ignored the following grouping variables:
f3b <- age_sex_impact2 %>%
mutate(sex = case_when(sex=="M" ~ "Male",
sex=="F" ~ "Female")) %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
ggplot() +
geom_pointrange(aes(y=acf_rr, ymin=acf_rr.lower, ymax=acf_rr.upper, group=sex,
x=age,
colour = sex),
position = position_dodge(width = 0.25)) +
geom_hline(aes(yintercept=1), linetype=2) +
scale_colour_manual(values = c("purple", "darkorange"), name="") +
labs(x="",
y="Relative notifications (95% UI)\nACF (1958) vs. Before ACF (1956)") +
theme_ggdist() +
theme(legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA))
age_sex_impact3 <- mdata_age_sex %>%
select(year, year2, y_num, acf_period, cases, age, sex) %>%
filter(year!=1957) %>%
add_epred_draws(m_age_sex) %>%
group_by(year, age, sex, acf_period) %>%
mean_qi(.epred) %>%
ungroup() %>%
mutate(n_years = length(year), .by=acf_period) %>%
summarise(pct_change_epred_overall = (((last(.epred) - first(.epred))/first(.epred))),
pct_change_lower_overall = (((last(.lower) - first(.lower))/first(.lower))),
pct_change_upper_overall = (((last(.upper) - first(.upper))/first(.upper))),
pct_change_epred_annual = (((last(.epred) - first(.epred))/first(.epred))/n_years),
pct_change_lower_annual = (((last(.lower) - first(.lower))/first(.lower))/n_years),
pct_change_upper_annual = (((last(.upper) - first(.upper))/first(.upper))/n_years),
.by = c(acf_period, age, sex)) %>%
distinct()
Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in dplyr 1.1.0.
Please use `reframe()` instead.
When switching from `summarise()` to `reframe()`, remember that `reframe()` always returns an ungrouped data frame and adjust accordingly.
age_sex_impact3 %>%
mutate_if(is.double, percent) %>%
datatable()
f3c <- age_sex_impact3 %>%
mutate(sex = case_when(sex=="M" ~ "Male",
sex=="F" ~ "Female")) %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
ggplot() +
geom_hline(aes(yintercept=0), linetype=2) +
geom_pointrange(aes(y=pct_change_epred_annual, ymin=pct_change_lower_annual, ymax=pct_change_upper_annual, group=acf_period,
x=age,
colour = acf_period), size=0.1) +
scale_y_continuous(labels =percent) +
facet_grid(.~sex) +
coord_flip() +
scale_colour_manual(values = c("#DE0D92", "#4D6CFA")) +
labs(x="",
y="Mean annual rate of change in case notification rate (95% UI)\n Before ACF (1950-1956) vs. after ACF (1958-1963)",
colour="") +
theme_ggdist() +
theme(panel.border = element_rect(colour = "grey78", fill=NA))
f3c
counterfact_age_sex <-
add_epred_draws(object = m_age_sex,
newdata = mdata_age_sex %>%
select(year, year2, y_num, age, sex) %>%
mutate(acf_period = "a. pre-acf")) %>%
filter(year>1957) %>%
select(year, age, sex, .draw, .epred_counterf = .epred)
Adding missing grouping variables: `year2`, `y_num`, `acf_period`, `.row`
#Calcuate incidence per draw, then summarise.
post_change_age_sex <-
add_epred_draws(object = m_age_sex,
newdata = mdata_age_sex %>%
select(year, year2, y_num, age, sex, acf_period)) %>%
filter(year>1957) %>%
ungroup() %>%
select(year, age, sex, .draw, .epred)
#for the overall period
counterfact_overall_age_sex <-
add_epred_draws(object = m_age_sex,
newdata = mdata_age_sex %>%
select(year, year2, y_num, age, sex) %>%
mutate(acf_period = "a. pre-acf")) %>%
filter(year>1957) %>%
select(age, sex, .draw, .epred) %>%
group_by(age, sex, .draw) %>%
summarise(.epred_counterf = sum(.epred)) %>%
mutate(year = "Overall (1958-1963)")
Adding missing grouping variables: `year`, `year2`, `y_num`, `acf_period`, `.row``summarise()` has grouped output by 'age', 'sex'. You can override using the `.groups` argument.
#Calcuate incidence per draw, then summarise.
post_change_overall_age_sex <-
add_epred_draws(object = m_age_sex,
newdata = mdata_age_sex %>%
select(year, year2, y_num, age, sex, acf_period)) %>%
filter(year>1957) %>%
select(age, sex, .draw, .epred) %>%
group_by(.draw, age, sex) %>%
summarise(.epred = sum(.epred))
Adding missing grouping variables: `year`, `year2`, `y_num`, `acf_period`, `.row``summarise()` has grouped output by '.draw', 'age'. You can override using the `.groups` argument.
left_join(counterfact_age_sex, post_change_age_sex) %>%
mutate(cases_averted = .epred_counterf-.epred,
pct_change = (.epred - .epred_counterf)/.epred_counterf) %>%
group_by(year, age, sex) %>%
mean_qi(cases_averted, pct_change) %>%
ungroup() %>%
datatable()
Joining with `by = join_by(year, age, sex, .draw)`
counter_post_overall_age_sex <-
left_join(counterfact_overall_age_sex, post_change_overall_age_sex) %>%
mutate(cases_averted = .epred_counterf-.epred,
pct_change = (.epred - .epred_counterf)/.epred_counterf) %>%
group_by(age, sex) %>%
mean_qi(cases_averted, pct_change) %>%
ungroup() %>%
mutate(year = "Overall (1958-1963)")
Joining with `by = join_by(age, sex, .draw)`
age_sex_txt <- counter_post_overall_age_sex %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
transmute(year = as.character(year),
sex = sex,
age = age,
cases_averted = glue::glue("{cases_averted}\n({cases_averted.lower} to {cases_averted.upper})"),
pct_change = glue::glue("{pct_change}\n({pct_change.lower} to {pct_change.upper})"))
age_sex_txt %>% datatable()
NA
NA
f3d <- counter_post_overall_age_sex %>%
mutate(sex = case_when(sex=="M" ~ "Male",
sex=="F" ~ "Female")) %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
ggplot() +
geom_pointrange(aes(x = age, y=cases_averted, ymin=cases_averted.lower, ymax=cases_averted.upper, colour=sex)) +
facet_grid(.~sex) +
coord_flip() +
scale_colour_manual(values = c("purple", "darkorange"), name="") +
scale_y_continuous(labels = comma) +
labs(x="",
y="Number (95% UI) of TB cases averted (1958-1963)",
colour="") +
theme_ggdist() +
theme(panel.border = element_rect(colour = "grey78", fill=NA),
legend.position = "none")
f3d
Join together for Figure 2.
(f3a + f3b) / (f3c + f3d) + plot_annotation(tag_levels = "A")
ggsave(here("figures/f3.png"), width = 12)
Saving 12 x 4.51 in image
mdata_age_sex